@@ -1787,21 +1787,25 @@ class BilateralFilter_8u_Invoker :
1787
1787
#if CV_SSE3
1788
1788
if ( haveSSE3 )
1789
1789
{
1790
+ const __m128i izero = _mm_setzero_si128 ();
1790
1791
const __m128 _b0 = _mm_set1_ps (static_cast <float >(b0));
1791
1792
const __m128 _g0 = _mm_set1_ps (static_cast <float >(g0));
1792
1793
const __m128 _r0 = _mm_set1_ps (static_cast <float >(r0));
1793
1794
const __m128 _signMask = _mm_load_ps ((const float *)bufSignMask);
1794
1795
1795
1796
for ( ; k <= maxk - 4 ; k += 4 )
1796
1797
{
1797
- const uchar* sptr_k = sptr + j + space_ofs[k];
1798
- const uchar* sptr_k1 = sptr + j + space_ofs[k+1 ];
1799
- const uchar* sptr_k2 = sptr + j + space_ofs[k+2 ];
1800
- const uchar* sptr_k3 = sptr + j + space_ofs[k+3 ];
1801
-
1802
- __m128 _b = _mm_set_ps (sptr_k3[0 ],sptr_k2[0 ],sptr_k1[0 ],sptr_k[0 ]);
1803
- __m128 _g = _mm_set_ps (sptr_k3[1 ],sptr_k2[1 ],sptr_k1[1 ],sptr_k[1 ]);
1804
- __m128 _r = _mm_set_ps (sptr_k3[2 ],sptr_k2[2 ],sptr_k1[2 ],sptr_k[2 ]);
1798
+ const int * const sptr_k0 = reinterpret_cast <const int *>(sptr + j + space_ofs[k]);
1799
+ const int * const sptr_k1 = reinterpret_cast <const int *>(sptr + j + space_ofs[k+1 ]);
1800
+ const int * const sptr_k2 = reinterpret_cast <const int *>(sptr + j + space_ofs[k+2 ]);
1801
+ const int * const sptr_k3 = reinterpret_cast <const int *>(sptr + j + space_ofs[k+3 ]);
1802
+
1803
+ __m128 _b = _mm_cvtepi32_ps (_mm_unpacklo_epi16 (_mm_unpacklo_epi8 (_mm_cvtsi32_si128 (sptr_k0[0 ]), izero), izero));
1804
+ __m128 _g = _mm_cvtepi32_ps (_mm_unpacklo_epi16 (_mm_unpacklo_epi8 (_mm_cvtsi32_si128 (sptr_k1[0 ]), izero), izero));
1805
+ __m128 _r = _mm_cvtepi32_ps (_mm_unpacklo_epi16 (_mm_unpacklo_epi8 (_mm_cvtsi32_si128 (sptr_k2[0 ]), izero), izero));
1806
+ __m128 _z = _mm_cvtepi32_ps (_mm_unpacklo_epi16 (_mm_unpacklo_epi8 (_mm_cvtsi32_si128 (sptr_k3[0 ]), izero), izero));
1807
+
1808
+ _MM_TRANSPOSE4_PS (_b, _g, _r, _z);
1805
1809
1806
1810
__m128 bt = _mm_andnot_ps (_signMask, _mm_sub_ps (_b,_b0));
1807
1811
__m128 gt = _mm_andnot_ps (_signMask, _mm_sub_ps (_g,_g0));
0 commit comments