Merge pull request #2937 from ilya-lavrenov:sse_patch_nans
This commit is contained in:
commit
b2ab1b758a
@ -2543,12 +2543,33 @@ void patchNaNs( InputOutputArray _a, double _val )
|
||||
NAryMatIterator it(arrays, (uchar**)ptrs);
|
||||
size_t len = it.size*a.channels();
|
||||
Cv32suf val;
|
||||
val.f = (float)_val;
|
||||
float fval = (float)_val;
|
||||
val.f = fval;
|
||||
|
||||
#if CV_SSE2
|
||||
__m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000);
|
||||
__m128i v_val = _mm_set1_epi32(val.i);
|
||||
#endif
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
{
|
||||
int* tptr = ptrs[0];
|
||||
for( size_t j = 0; j < len; j++ )
|
||||
size_t j = 0;
|
||||
|
||||
#if CV_SSE2
|
||||
if (USE_SSE2)
|
||||
{
|
||||
for ( ; j < len; j += 4)
|
||||
{
|
||||
__m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j));
|
||||
__m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1));
|
||||
__m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val));
|
||||
_mm_storeu_si128((__m128i *)(tptr + j), v_res);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for( ; j < len; j++ )
|
||||
if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
|
||||
tptr[j] = val.i;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user