SSE2 optimization of cv::preCornerDetect
This commit is contained in:
parent
2d81595ed4
commit
654bdde8ed
@ -608,6 +608,11 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
|
|||||||
factor *= 255;
|
factor *= 255;
|
||||||
factor = 1./(factor * factor * factor);
|
factor = 1./(factor * factor * factor);
|
||||||
|
|
||||||
|
#if CV_SSE2
|
||||||
|
volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
|
||||||
|
__m128 v_factor = _mm_set1_ps((float)factor), v_m2 = _mm_set1_ps(-2.0f);
|
||||||
|
#endif
|
||||||
|
|
||||||
Size size = src.size();
|
Size size = src.size();
|
||||||
int i, j;
|
int i, j;
|
||||||
for( i = 0; i < size.height; i++ )
|
for( i = 0; i < size.height; i++ )
|
||||||
@ -619,7 +624,26 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
|
|||||||
const float* d2ydata = (const float*)(D2y.data + i*D2y.step);
|
const float* d2ydata = (const float*)(D2y.data + i*D2y.step);
|
||||||
const float* dxydata = (const float*)(Dxy.data + i*Dxy.step);
|
const float* dxydata = (const float*)(Dxy.data + i*Dxy.step);
|
||||||
|
|
||||||
for( j = 0; j < size.width; j++ )
|
j = 0;
|
||||||
|
|
||||||
|
#if CV_SSE2
|
||||||
|
if (haveSSE2)
|
||||||
|
{
|
||||||
|
for( ; j <= size.width - 4; j += 4 )
|
||||||
|
{
|
||||||
|
__m128 v_dx = _mm_loadu_ps((const float *)(dxdata + j));
|
||||||
|
__m128 v_dy = _mm_loadu_ps((const float *)(dydata + j));
|
||||||
|
|
||||||
|
__m128 v_s1 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dx), _mm_loadu_ps((const float *)(d2ydata + j)));
|
||||||
|
__m128 v_s2 = _mm_mul_ps(_mm_mul_ps(v_dy, v_dy), _mm_loadu_ps((const float *)(d2xdata + j)));
|
||||||
|
__m128 v_s3 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dy), _mm_loadu_ps((const float *)(dxydata + j)));
|
||||||
|
v_s1 = _mm_mul_ps(v_factor, _mm_add_ps(v_s1, _mm_add_ps(v_s2, _mm_mul_ps(v_s3, v_m2))));
|
||||||
|
_mm_storeu_ps(dstdata + j, v_s1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for( ; j < size.width; j++ )
|
||||||
{
|
{
|
||||||
float dx = dxdata[j];
|
float dx = dxdata[j];
|
||||||
float dy = dydata[j];
|
float dy = dydata[j];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user