integrated patch with some SSE2/SSE4.2 optimizations from Grigory Frolov

This commit is contained in:
Vadim Pisarevsky
2012-07-24 17:24:31 +04:00
parent 54d68da8e7
commit b782d8bb53
7 changed files with 220 additions and 10 deletions

View File

@@ -221,6 +221,36 @@ static int countNonZero_(const T* src, int len )
return nz;
}
template <>
int countNonZero_ <uchar> (const uchar* src, int len)
{
int i=0, nz = 0;
#if CV_SSE4_2
if(USE_SSE4_2)//5x-6x
{
__m128i pattern = _mm_setzero_si128 ();
__m128i inv = _mm_set1_epi8((char)1);
__int64 CV_DECL_ALIGNED(16) buf[2];
for (; i<=len-16; i+=16)
{
__m128i r0 = _mm_lddqu_si128((const __m128i*)(src+i));
__m128i res = _mm_cmpeq_epi8(r0, pattern);
res = _mm_add_epi8(res, inv);//11111111+1=00000000, 00000000+1=00000001
_mm_store_si128 ((__m128i*)buf, res);
__int64 countLow = _mm_popcnt_u64(buf[0]);
nz += countLow;
__int64 countHigh = _mm_popcnt_u64(buf[1]);
nz +=countHigh;
}
}
#endif
for( ; i < len; i++ )
nz += src[i] != 0;
return nz;
}
static int countNonZero8u( const uchar* src, int len )
{ return countNonZero_(src, len); }
@@ -1982,4 +2012,4 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
cv::extractImageCOI(imgB, b);
return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);
}
}