@@ -1798,26 +1798,27 @@ DEFN_ARITH_OPERATIONS(double)
1798
1798
DEFN_ARITH_OPERATIONS (half )
1799
1799
#endif // defined(cl_khr_fp16)
1800
1800
1801
- #define DEFN_WORK_GROUP_REDUCE (type , op , X ) \
1802
- { \
1803
- GET_MEMPOOL_PTR(data, type, true, 0) \
1804
- uint lid = __spirv_BuiltInLocalInvocationIndex(); \
1805
- uint lsize = __spirv_WorkgroupSize(); \
1806
- data[lid] = X; \
1807
- \
1808
- uint i = 1 << ( ((8 * sizeof(uint)) - __builtin_spirv_OpenCL_clz_i32(lsize - 1)) - 1); \
1809
- while(i > 0) \
1810
- { \
1801
+ #define DEFN_WORK_GROUP_REDUCE (type , op , identity , X ) \
1802
+ { \
1803
+ GET_MEMPOOL_PTR(data, type, true, 0) \
1804
+ uint lid = __spirv_BuiltInLocalInvocationIndex(); \
1805
+ uint lsize = __spirv_WorkgroupSize(); \
1806
+ data[lid] = X; \
1807
+ __builtin_spirv_OpControlBarrier_i32_i32_i32(Execution, 0, AcquireRelease | WorkgroupMemory); \
1808
+ uint mask = 1 << ( ((8 * sizeof(uint)) - __builtin_spirv_OpenCL_clz_i32(lsize - 1)) - 1) ; \
1809
+ while( mask > 0 ) \
1810
+ { \
1811
+ uint c = lid ^ mask; \
1812
+ type other = ( c < lsize ) ? data[ c ] : identity; \
1813
+ X = op( other, X ); \
1814
+ __builtin_spirv_OpControlBarrier_i32_i32_i32(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
1815
+ data[lid] = X; \
1811
1816
__builtin_spirv_OpControlBarrier_i32_i32_i32(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
1812
- if ((lid < i) && (lid + i < lsize)) \
1813
- { \
1814
- X = op(X, data[lid + i]); \
1815
- data[lid] = X; \
1816
- } \
1817
- i >>= 1; \
1818
- } \
1817
+ mask >>= 1; \
1818
+ } \
1819
+ type ret = data[0]; \
1819
1820
__builtin_spirv_OpControlBarrier_i32_i32_i32(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
1820
- return data[0]; \
1821
+ return ret; \
1821
1822
}
1822
1823
1823
1824
@@ -1987,7 +1988,7 @@ DEFN_ARITH_OPERATIONS(half)
1987
1988
{ \
1988
1989
switch(Operation){ \
1989
1990
case GroupOperationReduce: \
1990
- DEFN_WORK_GROUP_REDUCE(type, op, X) \
1991
+ DEFN_WORK_GROUP_REDUCE(type, op, identity, X) \
1991
1992
break; \
1992
1993
case GroupOperationInclusiveScan: \
1993
1994
DEFN_WORK_GROUP_SCAN_INCL(type, op, identity, X) \
0 commit comments