@@ -905,7 +905,7 @@ SZ_PUBLIC sz_cptr_t sz_find_westmere(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
905905 sz_locate_needle_anomalies_ (n , n_length , & offset_first , & offset_mid , & offset_last );
906906
907907 // Broadcast those characters into XMM registers.
908- int matches ;
908+ sz_u32_vec_t matches_vec ;
909909 sz_u128_vec_t h_first_vec , h_mid_vec , h_last_vec , n_first_vec , n_mid_vec , n_last_vec ;
910910 n_first_vec .xmm = _mm_set1_epi8 (n [offset_first ]);
911911 n_mid_vec .xmm = _mm_set1_epi8 (n [offset_mid ]);
@@ -916,14 +916,14 @@ SZ_PUBLIC sz_cptr_t sz_find_westmere(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
916916 h_first_vec .xmm = _mm_lddqu_si128 ((__m128i const * )(h + offset_first ));
917917 h_mid_vec .xmm = _mm_lddqu_si128 ((__m128i const * )(h + offset_mid ));
918918 h_last_vec .xmm = _mm_lddqu_si128 ((__m128i const * )(h + offset_last ));
919- matches = //
919+ matches_vec . i32 = //
920920 _mm_movemask_epi8 (_mm_cmpeq_epi8 (h_first_vec .xmm , n_first_vec .xmm )) &
921921 _mm_movemask_epi8 (_mm_cmpeq_epi8 (h_mid_vec .xmm , n_mid_vec .xmm )) &
922922 _mm_movemask_epi8 (_mm_cmpeq_epi8 (h_last_vec .xmm , n_last_vec .xmm ));
923- while (matches ) {
924- int potential_offset = sz_u32_ctz (matches );
923+ while (matches_vec . u32 ) {
924+ int potential_offset = sz_u32_ctz (matches_vec . u32 );
925925 if (sz_equal_westmere (h + potential_offset , n , n_length )) return h + potential_offset ;
926- matches &= matches - 1 ;
926+ matches_vec . u32 &= matches_vec . u32 - 1 ;
927927 }
928928 }
929929
@@ -940,7 +940,7 @@ SZ_PUBLIC sz_cptr_t sz_rfind_westmere(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
940940 sz_locate_needle_anomalies_ (n , n_length , & offset_first , & offset_mid , & offset_last );
941941
942942 // Broadcast those characters into XMM registers.
943- int matches ;
943+ sz_u32_vec_t matches_vec ;
944944 sz_u128_vec_t h_first_vec , h_mid_vec , h_last_vec , n_first_vec , n_mid_vec , n_last_vec ;
945945 n_first_vec .xmm = _mm_set1_epi8 (n [offset_first ]);
946946 n_mid_vec .xmm = _mm_set1_epi8 (n [offset_mid ]);
@@ -953,15 +953,15 @@ SZ_PUBLIC sz_cptr_t sz_rfind_westmere(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
953953 h_first_vec .xmm = _mm_lddqu_si128 ((__m128i const * )(h_reversed + offset_first ));
954954 h_mid_vec .xmm = _mm_lddqu_si128 ((__m128i const * )(h_reversed + offset_mid ));
955955 h_last_vec .xmm = _mm_lddqu_si128 ((__m128i const * )(h_reversed + offset_last ));
956- matches = //
956+ matches_vec . i32 = //
957957 _mm_movemask_epi8 (_mm_cmpeq_epi8 (h_first_vec .xmm , n_first_vec .xmm )) &
958958 _mm_movemask_epi8 (_mm_cmpeq_epi8 (h_mid_vec .xmm , n_mid_vec .xmm )) &
959959 _mm_movemask_epi8 (_mm_cmpeq_epi8 (h_last_vec .xmm , n_last_vec .xmm ));
960- while (matches ) {
961- int potential_offset = sz_u32_clz (matches ) - 16 ;
960+ while (matches_vec . u32 ) {
961+ int potential_offset = sz_u32_clz (matches_vec . u32 ) - 16 ;
962962 if (sz_equal_westmere (h + h_length - n_length - potential_offset , n , n_length ))
963963 return h + h_length - n_length - potential_offset ;
964- matches &= ~(1 << (15 - potential_offset ));
964+ matches_vec . u32 &= ~(1u << (15 - potential_offset ));
965965 }
966966 }
967967
@@ -1029,7 +1029,7 @@ SZ_PUBLIC sz_cptr_t sz_find_haswell(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n
10291029 sz_locate_needle_anomalies_ (n , n_length , & offset_first , & offset_mid , & offset_last );
10301030
10311031 // Broadcast those characters into YMM registers.
1032- int matches ;
1032+ sz_u32_vec_t matches_vec ;
10331033 sz_u256_vec_t h_first_vec , h_mid_vec , h_last_vec , n_first_vec , n_mid_vec , n_last_vec ;
10341034 n_first_vec .ymm = _mm256_set1_epi8 (n [offset_first ]);
10351035 n_mid_vec .ymm = _mm256_set1_epi8 (n [offset_mid ]);
@@ -1040,14 +1040,14 @@ SZ_PUBLIC sz_cptr_t sz_find_haswell(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n
10401040 h_first_vec .ymm = _mm256_lddqu_si256 ((__m256i const * )(h + offset_first ));
10411041 h_mid_vec .ymm = _mm256_lddqu_si256 ((__m256i const * )(h + offset_mid ));
10421042 h_last_vec .ymm = _mm256_lddqu_si256 ((__m256i const * )(h + offset_last ));
1043- matches = //
1043+ matches_vec . i32 = //
10441044 _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (h_first_vec .ymm , n_first_vec .ymm )) &
10451045 _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (h_mid_vec .ymm , n_mid_vec .ymm )) &
10461046 _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (h_last_vec .ymm , n_last_vec .ymm ));
1047- while (matches ) {
1048- int potential_offset = sz_u32_ctz (matches );
1047+ while (matches_vec . u32 ) {
1048+ int potential_offset = sz_u32_ctz (matches_vec . u32 );
10491049 if (sz_equal_haswell (h + potential_offset , n , n_length )) return h + potential_offset ;
1050- matches &= matches - 1 ;
1050+ matches_vec . u32 &= matches_vec . u32 - 1 ;
10511051 }
10521052 }
10531053
@@ -1065,7 +1065,7 @@ SZ_PUBLIC sz_cptr_t sz_rfind_haswell(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
10651065 sz_locate_needle_anomalies_ (n , n_length , & offset_first , & offset_mid , & offset_last );
10661066
10671067 // Broadcast those characters into YMM registers.
1068- int matches ;
1068+ sz_u32_vec_t matches_vec ;
10691069 sz_u256_vec_t h_first_vec , h_mid_vec , h_last_vec , n_first_vec , n_mid_vec , n_last_vec ;
10701070 n_first_vec .ymm = _mm256_set1_epi8 (n [offset_first ]);
10711071 n_mid_vec .ymm = _mm256_set1_epi8 (n [offset_mid ]);
@@ -1078,15 +1078,15 @@ SZ_PUBLIC sz_cptr_t sz_rfind_haswell(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
10781078 h_first_vec .ymm = _mm256_lddqu_si256 ((__m256i const * )(h_reversed + offset_first ));
10791079 h_mid_vec .ymm = _mm256_lddqu_si256 ((__m256i const * )(h_reversed + offset_mid ));
10801080 h_last_vec .ymm = _mm256_lddqu_si256 ((__m256i const * )(h_reversed + offset_last ));
1081- matches = //
1081+ matches_vec . i32 = //
10821082 _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (h_first_vec .ymm , n_first_vec .ymm )) &
10831083 _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (h_mid_vec .ymm , n_mid_vec .ymm )) &
10841084 _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (h_last_vec .ymm , n_last_vec .ymm ));
1085- while (matches ) {
1086- int potential_offset = sz_u32_clz (matches );
1085+ while (matches_vec . u32 ) {
1086+ int potential_offset = sz_u32_clz (matches_vec . u32 );
10871087 if (sz_equal_haswell (h + h_length - n_length - potential_offset , n , n_length ))
10881088 return h + h_length - n_length - potential_offset ;
1089- matches &= ~(1 << (31 - potential_offset ));
1089+ matches_vec . u32 &= ~(1u << (31 - potential_offset ));
10901090 }
10911091 }
10921092
@@ -1388,9 +1388,8 @@ SZ_PUBLIC sz_cptr_t sz_rfind_skylake(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
13881388 int potential_offset = sz_u64_clz (matches );
13891389 if (n_length <= 3 || sz_equal_skylake (h + h_length - n_length - potential_offset , n , n_length ))
13901390 return h + h_length - n_length - potential_offset ;
1391- sz_assert_ ((matches & ((sz_u64_t )1 << (63 - potential_offset ))) != 0 &&
1392- "The bit must be set before we squash it" );
1393- matches &= ~((sz_u64_t )1 << (63 - potential_offset ));
1391+ sz_assert_ ((matches & (1ull << (63 - potential_offset ))) != 0 && "The bit must be set before we squash it" );
1392+ matches &= ~(1ull << (63 - potential_offset ));
13941393 }
13951394 }
13961395
@@ -1409,9 +1408,8 @@ SZ_PUBLIC sz_cptr_t sz_rfind_skylake(sz_cptr_t h, sz_size_t h_length, sz_cptr_t
14091408 int potential_offset = sz_u64_clz (matches );
14101409 if (n_length <= 3 || sz_equal_skylake (h + 64 - potential_offset - 1 , n , n_length ))
14111410 return h + 64 - potential_offset - 1 ;
1412- sz_assert_ ((matches & ((sz_u64_t )1 << (63 - potential_offset ))) != 0 &&
1413- "The bit must be set before we squash it" );
1414- matches &= ~((sz_u64_t )1 << (63 - potential_offset ));
1411+ sz_assert_ ((matches & (1ull << (63 - potential_offset ))) != 0 && "The bit must be set before we squash it" );
1412+ matches &= ~(1ull << (63 - potential_offset ));
14151413 }
14161414 }
14171415
0 commit comments