Merge pull request #6740 from tomoaki0705:fixNeonCheckSmooth
This commit is contained in:
commit
e4cd24537e
@ -229,6 +229,8 @@ struct ColumnSum<int, uchar> :
|
|||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
||||||
|
#elif CV_NEON
|
||||||
|
bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if( width != (int)sum.size() )
|
if( width != (int)sum.size() )
|
||||||
@ -256,8 +258,11 @@ struct ColumnSum<int, uchar> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width - 4; i+=4 )
|
for( ; i <= width - 4; i+=4 )
|
||||||
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
SUM[i] += Sp[i];
|
SUM[i] += Sp[i];
|
||||||
@ -303,6 +308,8 @@ struct ColumnSum<int, uchar> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
@ -318,6 +325,7 @@ struct ColumnSum<int, uchar> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
{
|
{
|
||||||
@ -351,6 +359,8 @@ struct ColumnSum<int, uchar> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
||||||
@ -362,6 +372,7 @@ struct ColumnSum<int, uchar> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
@ -404,6 +415,8 @@ struct ColumnSum<int, short> :
|
|||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
||||||
|
#elif CV_NEON
|
||||||
|
bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if( width != (int)sum.size() )
|
if( width != (int)sum.size() )
|
||||||
@ -411,6 +424,7 @@ struct ColumnSum<int, short> :
|
|||||||
sum.resize(width);
|
sum.resize(width);
|
||||||
sumCount = 0;
|
sumCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
SUM = &sum[0];
|
SUM = &sum[0];
|
||||||
if( sumCount == 0 )
|
if( sumCount == 0 )
|
||||||
{
|
{
|
||||||
@ -430,8 +444,11 @@ struct ColumnSum<int, short> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width - 4; i+=4 )
|
for( ; i <= width - 4; i+=4 )
|
||||||
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
SUM[i] += Sp[i];
|
SUM[i] += Sp[i];
|
||||||
@ -475,6 +492,8 @@ struct ColumnSum<int, short> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
@ -488,6 +507,7 @@ struct ColumnSum<int, short> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
{
|
{
|
||||||
@ -520,6 +540,8 @@ struct ColumnSum<int, short> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
||||||
@ -530,6 +552,7 @@ struct ColumnSum<int, short> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
@ -570,8 +593,11 @@ struct ColumnSum<int, ushort> :
|
|||||||
int* SUM;
|
int* SUM;
|
||||||
bool haveScale = scale != 1;
|
bool haveScale = scale != 1;
|
||||||
double _scale = scale;
|
double _scale = scale;
|
||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
||||||
|
#elif CV_NEON
|
||||||
|
bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if( width != (int)sum.size() )
|
if( width != (int)sum.size() )
|
||||||
@ -579,6 +605,7 @@ struct ColumnSum<int, ushort> :
|
|||||||
sum.resize(width);
|
sum.resize(width);
|
||||||
sumCount = 0;
|
sumCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
SUM = &sum[0];
|
SUM = &sum[0];
|
||||||
if( sumCount == 0 )
|
if( sumCount == 0 )
|
||||||
{
|
{
|
||||||
@ -590,7 +617,7 @@ struct ColumnSum<int, ushort> :
|
|||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
if(haveSSE2)
|
if(haveSSE2)
|
||||||
{
|
{
|
||||||
for( ; i < width-4; i+=4 )
|
for( ; i <= width-4; i+=4 )
|
||||||
{
|
{
|
||||||
__m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
|
__m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
|
||||||
__m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
|
__m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
|
||||||
@ -598,8 +625,11 @@ struct ColumnSum<int, ushort> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width - 4; i+=4 )
|
for( ; i <= width - 4; i+=4 )
|
||||||
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
SUM[i] += Sp[i];
|
SUM[i] += Sp[i];
|
||||||
@ -642,6 +672,8 @@ struct ColumnSum<int, ushort> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
@ -655,6 +687,7 @@ struct ColumnSum<int, ushort> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
{
|
{
|
||||||
@ -686,6 +719,8 @@ struct ColumnSum<int, ushort> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
||||||
@ -696,6 +731,7 @@ struct ColumnSum<int, ushort> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
@ -738,6 +774,8 @@ struct ColumnSum<int, int> :
|
|||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
||||||
|
#elif CV_NEON
|
||||||
|
bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if( width != (int)sum.size() )
|
if( width != (int)sum.size() )
|
||||||
@ -745,6 +783,7 @@ struct ColumnSum<int, int> :
|
|||||||
sum.resize(width);
|
sum.resize(width);
|
||||||
sumCount = 0;
|
sumCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
SUM = &sum[0];
|
SUM = &sum[0];
|
||||||
if( sumCount == 0 )
|
if( sumCount == 0 )
|
||||||
{
|
{
|
||||||
@ -764,8 +803,11 @@ struct ColumnSum<int, int> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width - 4; i+=4 )
|
for( ; i <= width - 4; i+=4 )
|
||||||
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
SUM[i] += Sp[i];
|
SUM[i] += Sp[i];
|
||||||
@ -803,6 +845,8 @@ struct ColumnSum<int, int> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
||||||
for( ; i <= width-4; i+=4 )
|
for( ; i <= width-4; i+=4 )
|
||||||
{
|
{
|
||||||
@ -813,6 +857,7 @@ struct ColumnSum<int, int> :
|
|||||||
|
|
||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
{
|
{
|
||||||
@ -838,6 +883,8 @@ struct ColumnSum<int, int> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width-4; i+=4 )
|
for( ; i <= width-4; i+=4 )
|
||||||
{
|
{
|
||||||
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
||||||
@ -845,6 +892,7 @@ struct ColumnSum<int, int> :
|
|||||||
vst1q_s32(D + i, v_s0);
|
vst1q_s32(D + i, v_s0);
|
||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
@ -888,6 +936,8 @@ struct ColumnSum<int, float> :
|
|||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
|
||||||
|
#elif CV_NEON
|
||||||
|
bool haveNEON = checkHardwareSupport(CV_CPU_NEON);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if( width != (int)sum.size() )
|
if( width != (int)sum.size() )
|
||||||
@ -899,17 +949,15 @@ struct ColumnSum<int, float> :
|
|||||||
SUM = &sum[0];
|
SUM = &sum[0];
|
||||||
if( sumCount == 0 )
|
if( sumCount == 0 )
|
||||||
{
|
{
|
||||||
memset((void *)SUM, 0, sizeof(int) * width);
|
memset((void*)SUM, 0, width*sizeof(int));
|
||||||
|
|
||||||
for( ; sumCount < ksize - 1; sumCount++, src++ )
|
for( ; sumCount < ksize - 1; sumCount++, src++ )
|
||||||
{
|
{
|
||||||
const int* Sp = (const int*)src[0];
|
const int* Sp = (const int*)src[0];
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
if(haveSSE2)
|
if(haveSSE2)
|
||||||
{
|
{
|
||||||
for( ; i < width-4; i+=4 )
|
for( ; i <= width-4; i+=4 )
|
||||||
{
|
{
|
||||||
__m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
|
__m128i _sum = _mm_loadu_si128((const __m128i*)(SUM+i));
|
||||||
__m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
|
__m128i _sp = _mm_loadu_si128((const __m128i*)(Sp+i));
|
||||||
@ -917,8 +965,11 @@ struct ColumnSum<int, float> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width - 4; i+=4 )
|
for( ; i <= width - 4; i+=4 )
|
||||||
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
vst1q_s32(SUM + i, vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i)));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
@ -956,6 +1007,8 @@ struct ColumnSum<int, float> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
float32x4_t v_scale = vdupq_n_f32((float)_scale);
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
@ -968,6 +1021,7 @@ struct ColumnSum<int, float> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
@ -995,6 +1049,8 @@ struct ColumnSum<int, float> :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif CV_NEON
|
#elif CV_NEON
|
||||||
|
if(haveNEON)
|
||||||
|
{
|
||||||
for( ; i <= width-8; i+=8 )
|
for( ; i <= width-8; i+=8 )
|
||||||
{
|
{
|
||||||
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
int32x4_t v_s0 = vaddq_s32(vld1q_s32(SUM + i), vld1q_s32(Sp + i));
|
||||||
@ -1006,6 +1062,7 @@ struct ColumnSum<int, float> :
|
|||||||
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
vst1q_s32(SUM + i, vsubq_s32(v_s0, vld1q_s32(Sm + i)));
|
||||||
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
vst1q_s32(SUM + i + 4, vsubq_s32(v_s01, vld1q_s32(Sm + i + 4)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for( ; i < width; i++ )
|
for( ; i < width; i++ )
|
||||||
|
Loading…
x
Reference in New Issue
Block a user