use static assertation where suitable

This commit is contained in:
marina.kolpakova
2013-03-20 01:52:40 +04:00
parent 5c1c13e745
commit 4096b54560
7 changed files with 123 additions and 126 deletions

View File

@@ -100,61 +100,61 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo
#if CV_SSE2
if( patternSize == 16 )
{
for(; j < img.cols - 16 - 3; j += 16, ptr += 16)
{
__m128i m0, m1;
__m128i v0 = _mm_loadu_si128((const __m128i*)ptr);
__m128i v1 = _mm_xor_si128(_mm_subs_epu8(v0, t), delta);
v0 = _mm_xor_si128(_mm_adds_epu8(v0, t), delta);
__m128i x0 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[0])), delta);
__m128i x1 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[quarterPatternSize])), delta);
__m128i x2 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[2*quarterPatternSize])), delta);
__m128i x3 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[3*quarterPatternSize])), delta);
m0 = _mm_and_si128(_mm_cmpgt_epi8(x0, v0), _mm_cmpgt_epi8(x1, v0));
m1 = _mm_and_si128(_mm_cmpgt_epi8(v1, x0), _mm_cmpgt_epi8(v1, x1));
m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x1, v0), _mm_cmpgt_epi8(x2, v0)));
m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x1), _mm_cmpgt_epi8(v1, x2)));
m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x2, v0), _mm_cmpgt_epi8(x3, v0)));
m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x2), _mm_cmpgt_epi8(v1, x3)));
m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x3, v0), _mm_cmpgt_epi8(x0, v0)));
m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x3), _mm_cmpgt_epi8(v1, x0)));
m0 = _mm_or_si128(m0, m1);
int mask = _mm_movemask_epi8(m0);
if( mask == 0 )
continue;
if( (mask & 255) == 0 )
for(; j < img.cols - 16 - 3; j += 16, ptr += 16)
{
j -= 8;
ptr -= 8;
continue;
}
__m128i m0, m1;
__m128i v0 = _mm_loadu_si128((const __m128i*)ptr);
__m128i v1 = _mm_xor_si128(_mm_subs_epu8(v0, t), delta);
v0 = _mm_xor_si128(_mm_adds_epu8(v0, t), delta);
__m128i c0 = _mm_setzero_si128(), c1 = c0, max0 = c0, max1 = c0;
for( k = 0; k < N; k++ )
{
__m128i x = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(ptr + pixel[k])), delta);
m0 = _mm_cmpgt_epi8(x, v0);
m1 = _mm_cmpgt_epi8(v1, x);
c0 = _mm_and_si128(_mm_sub_epi8(c0, m0), m0);
c1 = _mm_and_si128(_mm_sub_epi8(c1, m1), m1);
max0 = _mm_max_epu8(max0, c0);
max1 = _mm_max_epu8(max1, c1);
}
max0 = _mm_max_epu8(max0, max1);
int m = _mm_movemask_epi8(_mm_cmpgt_epi8(max0, K16));
for( k = 0; m > 0 && k < 16; k++, m >>= 1 )
if(m & 1)
__m128i x0 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[0])), delta);
__m128i x1 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[quarterPatternSize])), delta);
__m128i x2 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[2*quarterPatternSize])), delta);
__m128i x3 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[3*quarterPatternSize])), delta);
m0 = _mm_and_si128(_mm_cmpgt_epi8(x0, v0), _mm_cmpgt_epi8(x1, v0));
m1 = _mm_and_si128(_mm_cmpgt_epi8(v1, x0), _mm_cmpgt_epi8(v1, x1));
m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x1, v0), _mm_cmpgt_epi8(x2, v0)));
m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x1), _mm_cmpgt_epi8(v1, x2)));
m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x2, v0), _mm_cmpgt_epi8(x3, v0)));
m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x2), _mm_cmpgt_epi8(v1, x3)));
m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x3, v0), _mm_cmpgt_epi8(x0, v0)));
m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x3), _mm_cmpgt_epi8(v1, x0)));
m0 = _mm_or_si128(m0, m1);
int mask = _mm_movemask_epi8(m0);
if( mask == 0 )
continue;
if( (mask & 255) == 0 )
{
cornerpos[ncorners++] = j+k;
if(nonmax_suppression)
curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold);
j -= 8;
ptr -= 8;
continue;
}
}
__m128i c0 = _mm_setzero_si128(), c1 = c0, max0 = c0, max1 = c0;
for( k = 0; k < N; k++ )
{
__m128i x = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(ptr + pixel[k])), delta);
m0 = _mm_cmpgt_epi8(x, v0);
m1 = _mm_cmpgt_epi8(v1, x);
c0 = _mm_and_si128(_mm_sub_epi8(c0, m0), m0);
c1 = _mm_and_si128(_mm_sub_epi8(c1, m1), m1);
max0 = _mm_max_epu8(max0, c0);
max1 = _mm_max_epu8(max1, c1);
}
max0 = _mm_max_epu8(max0, max1);
int m = _mm_movemask_epi8(_mm_cmpgt_epi8(max0, K16));
for( k = 0; m > 0 && k < 16; k++, m >>= 1 )
if(m & 1)
{
cornerpos[ncorners++] = j+k;
if(nonmax_suppression)
curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold);
}
}
}
#endif
for( ; j < img.cols - 3; j++, ptr++ )