SSE2 optimization of cv::preCornerDetect
This commit is contained in:
parent
2d81595ed4
commit
654bdde8ed
@ -608,6 +608,11 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
|
||||
factor *= 255;
|
||||
factor = 1./(factor * factor * factor);
|
||||
|
||||
#if CV_SSE2
|
||||
volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
|
||||
__m128 v_factor = _mm_set1_ps((float)factor), v_m2 = _mm_set1_ps(-2.0f);
|
||||
#endif
|
||||
|
||||
Size size = src.size();
|
||||
int i, j;
|
||||
for( i = 0; i < size.height; i++ )
|
||||
@ -619,7 +624,26 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
|
||||
const float* d2ydata = (const float*)(D2y.data + i*D2y.step);
|
||||
const float* dxydata = (const float*)(Dxy.data + i*Dxy.step);
|
||||
|
||||
for( j = 0; j < size.width; j++ )
|
||||
j = 0;
|
||||
|
||||
#if CV_SSE2
|
||||
if (haveSSE2)
|
||||
{
|
||||
for( ; j <= size.width - 4; j += 4 )
|
||||
{
|
||||
__m128 v_dx = _mm_loadu_ps((const float *)(dxdata + j));
|
||||
__m128 v_dy = _mm_loadu_ps((const float *)(dydata + j));
|
||||
|
||||
__m128 v_s1 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dx), _mm_loadu_ps((const float *)(d2ydata + j)));
|
||||
__m128 v_s2 = _mm_mul_ps(_mm_mul_ps(v_dy, v_dy), _mm_loadu_ps((const float *)(d2xdata + j)));
|
||||
__m128 v_s3 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dy), _mm_loadu_ps((const float *)(dxydata + j)));
|
||||
v_s1 = _mm_mul_ps(v_factor, _mm_add_ps(v_s1, _mm_add_ps(v_s2, _mm_mul_ps(v_s3, v_m2))));
|
||||
_mm_storeu_ps(dstdata + j, v_s1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for( ; j < size.width; j++ )
|
||||
{
|
||||
float dx = dxdata[j];
|
||||
float dy = dydata[j];
|
||||
|
Loading…
x
Reference in New Issue
Block a user