Add asm code for NoneZeroCount and refine related code
This commit is contained in:
parent
7d5e88ffda
commit
01b74ea7c1
@ -834,17 +834,12 @@ WELS_ASM_FUNC_END
|
|||||||
|
|
||||||
|
|
||||||
WELS_ASM_FUNC_BEGIN WelsNonZeroCount_neon
|
WELS_ASM_FUNC_BEGIN WelsNonZeroCount_neon
|
||||||
|
mov r1, #1
|
||||||
vld1.64 {d0-d2}, [r0]
|
vdup.8 q2, r1
|
||||||
|
vld1.64 {d0,d1,d2}, [r0]
|
||||||
vceq.s8 q0, q0, #0
|
vmin.s8 q0, q0, q2
|
||||||
vceq.s8 d2, d2, #0
|
vmin.s8 d2, d2, d4
|
||||||
vmvn q0, q0
|
vst1.64 {d0,d1,d2}, [r0]
|
||||||
vmvn d2, d2
|
|
||||||
vabs.s8 q0, q0
|
|
||||||
vabs.s8 d2, d2
|
|
||||||
|
|
||||||
vst1.64 {d0-d2}, [r0]
|
|
||||||
WELS_ASM_FUNC_END
|
WELS_ASM_FUNC_END
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
|
@ -553,16 +553,12 @@ bs_mv_check_jump1:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsNonZeroCount_AArch64_neon
|
WELS_ASM_AARCH64_FUNC_BEGIN WelsNonZeroCount_AArch64_neon
|
||||||
|
mov w1, #1
|
||||||
|
dup v3.8b, w1
|
||||||
ld1 {v0.8b, v1.8b, v2.8b}, [x0]
|
ld1 {v0.8b, v1.8b, v2.8b}, [x0]
|
||||||
ins v0.d[1], v1.d[0]
|
umin v0.8b, v0.8b, v3.8b
|
||||||
uzp1 v0.2d, v0.2d, v1.2d
|
umin v1.8b, v1.8b, v3.8b
|
||||||
cmeq v0.16b, v0.16b, #0
|
umin v2.8b, v2.8b, v3.8b
|
||||||
cmeq v2.8b, v2.8b, #0
|
|
||||||
mvn v0.16b, v0.16b
|
|
||||||
mvn v2.8b, v2.8b
|
|
||||||
abs v0.16b, v0.16b
|
|
||||||
abs v2.8b, v2.8b
|
|
||||||
ins v1.d[0], v0.d[1]
|
|
||||||
st1 {v0.8b, v1.8b, v2.8b}, [x0]
|
st1 {v0.8b, v1.8b, v2.8b}, [x0]
|
||||||
WELS_ASM_AARCH64_FUNC_END
|
WELS_ASM_AARCH64_FUNC_END
|
||||||
|
|
||||||
|
@ -15,6 +15,8 @@ void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int
|
|||||||
int8_t* pTc);
|
int8_t* pTc);
|
||||||
void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||||
|
|
||||||
|
void WelsNonZeroCount_c (int8_t* pNonZeroCount);
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
@ -32,6 +34,7 @@ void DeblockChromaLt4V_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
|
|||||||
void DeblockChromaEq4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
void DeblockChromaEq4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||||
void DeblockChromaLt4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
void DeblockChromaLt4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
||||||
int8_t* pTC);
|
int8_t* pTC);
|
||||||
|
void WelsNonZeroCount_sse2 (int8_t* pNonZeroCount);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
@ -48,6 +51,7 @@ void DeblockChromaEq4V_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
|
|||||||
void DeblockChromaLt4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
void DeblockChromaLt4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
||||||
int8_t* pTC);
|
int8_t* pTC);
|
||||||
void DeblockChromaEq4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
void DeblockChromaEq4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||||
|
void WelsNonZeroCount_neon (int8_t* pNonZeroCount);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON_AARCH64)
|
#if defined(HAVE_NEON_AARCH64)
|
||||||
@ -61,6 +65,7 @@ void DeblockChromaEq4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t i
|
|||||||
void DeblockChromaLt4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
void DeblockChromaLt4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
||||||
int8_t* pTC);
|
int8_t* pTC);
|
||||||
void DeblockChromaEq4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
void DeblockChromaEq4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||||
|
void WelsNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
|
||||||
#endif
|
#endif
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
}
|
}
|
||||||
|
@ -180,6 +180,13 @@ void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int
|
|||||||
DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta);
|
DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WelsNonZeroCount_c (int8_t* pNonZeroCount) {
|
||||||
|
int32_t i;
|
||||||
|
for (i = 0; i < 24; i++) {
|
||||||
|
pNonZeroCount[i] = !!pNonZeroCount[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef X86_ASM
|
#ifdef X86_ASM
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
|
void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
|
||||||
|
@ -5276,3 +5276,14 @@ WELS_EXTERN DeblockLumaTransposeV2H_sse2
|
|||||||
pop r3
|
pop r3
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
WELS_EXTERN WelsNonZeroCount_sse2
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_1_PARA
|
||||||
|
movdqu xmm0, [r0]
|
||||||
|
movq xmm1, [r0+16]
|
||||||
|
WELS_DB1 xmm2
|
||||||
|
pminub xmm0, xmm2
|
||||||
|
pminub xmm1, xmm2
|
||||||
|
movdqu [r0], xmm0
|
||||||
|
movq [r0+16], xmm1
|
||||||
|
ret
|
||||||
|
@ -99,18 +99,6 @@
|
|||||||
// }
|
// }
|
||||||
.endm
|
.endm
|
||||||
#endif
|
#endif
|
||||||
// r0 int8_t* non_zero_count,
|
|
||||||
WELS_ASM_FUNC_BEGIN SetNonZeroCount_neon
|
|
||||||
vld1.64 {d0-d2}, [r0]
|
|
||||||
vceq.s8 q0, q0, #0
|
|
||||||
vceq.s8 d2, d2, #0
|
|
||||||
vmvn q0, q0
|
|
||||||
vmvn d2, d2
|
|
||||||
vabs.s8 q0, q0
|
|
||||||
vabs.s8 d2, d2
|
|
||||||
vst1.64 {d0-d2}, [r0]
|
|
||||||
WELS_ASM_FUNC_END
|
|
||||||
|
|
||||||
|
|
||||||
// uint8_t *pred, const int32_t stride, int16_t *rs
|
// uint8_t *pred, const int32_t stride, int16_t *rs
|
||||||
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
|
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
|
||||||
|
@ -100,20 +100,6 @@
|
|||||||
// }
|
// }
|
||||||
.endm
|
.endm
|
||||||
#endif
|
#endif
|
||||||
// x0 int8_t* non_zero_count,
|
|
||||||
WELS_ASM_AARCH64_FUNC_BEGIN SetNonZeroCount_AArch64_neon
|
|
||||||
mov x1, x0
|
|
||||||
ld1 {v0.16b}, [x1], #16
|
|
||||||
ld1 {v1.8b}, [x1]
|
|
||||||
cmeq v0.16b, v0.16b, #0
|
|
||||||
cmeq v1.8b, v1.8b, #0
|
|
||||||
mvn v0.16b, v0.16b
|
|
||||||
mvn v1.8b, v1.8b
|
|
||||||
abs v0.16b, v0.16b
|
|
||||||
abs v1.8b, v1.8b
|
|
||||||
st1 {v0.16b}, [x0], #16
|
|
||||||
st1 {v1.8b}, [x0]
|
|
||||||
WELS_ASM_AARCH64_FUNC_END
|
|
||||||
|
|
||||||
// uint8_t *pred, const int32_t stride, int16_t *rs
|
// uint8_t *pred, const int32_t stride, int16_t *rs
|
||||||
WELS_ASM_AARCH64_FUNC_BEGIN IdctResAddPred_AArch64_neon
|
WELS_ASM_AARCH64_FUNC_BEGIN IdctResAddPred_AArch64_neon
|
||||||
|
@ -73,13 +73,11 @@ void WelsBlockZero8x8_sse2(int16_t * block, int32_t stride);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
void SetNonZeroCount_neon (int8_t* pNonZeroCount);
|
|
||||||
void WelsBlockZero16x16_neon(int16_t * block, int32_t stride);
|
void WelsBlockZero16x16_neon(int16_t * block, int32_t stride);
|
||||||
void WelsBlockZero8x8_neon(int16_t * block, int32_t stride);
|
void WelsBlockZero8x8_neon(int16_t * block, int32_t stride);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON_AARCH64)
|
#if defined(HAVE_NEON_AARCH64)
|
||||||
void SetNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
|
|
||||||
void WelsBlockZero16x16_AArch64_neon(int16_t * block, int32_t stride);
|
void WelsBlockZero16x16_AArch64_neon(int16_t * block, int32_t stride);
|
||||||
void WelsBlockZero8x8_AArch64_neon(int16_t * block, int32_t stride);
|
void WelsBlockZero8x8_AArch64_neon(int16_t * block, int32_t stride);
|
||||||
#endif
|
#endif
|
||||||
@ -87,8 +85,6 @@ void WelsBlockZero8x8_AArch64_neon(int16_t * block, int32_t stride);
|
|||||||
}
|
}
|
||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
|
|
||||||
void SetNonZeroCount_c (int8_t* pNonZeroCount);
|
|
||||||
|
|
||||||
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);
|
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);
|
||||||
void WelsBlockZero16x16_c(int16_t * block, int32_t stride);
|
void WelsBlockZero16x16_c(int16_t * block, int32_t stride);
|
||||||
void WelsBlockZero8x8_c(int16_t * block, int32_t stride);
|
void WelsBlockZero8x8_c(int16_t * block, int32_t stride);
|
||||||
|
@ -1690,25 +1690,13 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin
|
|||||||
}
|
}
|
||||||
|
|
||||||
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
||||||
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c;
|
pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_c;
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
|
||||||
if (iCpu & WELS_CPU_NEON) {
|
|
||||||
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_neon;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_NEON_AARCH64
|
|
||||||
if (iCpu & WELS_CPU_NEON) {
|
|
||||||
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_AArch64_neon;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
|
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
|
||||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
|
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
|
||||||
//TO DO add neon and X86
|
|
||||||
#ifdef HAVE_NEON
|
#ifdef HAVE_NEON
|
||||||
if (iCpu & WELS_CPU_NEON) {
|
if (iCpu & WELS_CPU_NEON) {
|
||||||
|
pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_neon;
|
||||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_neon;
|
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_neon;
|
||||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_neon;
|
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_neon;
|
||||||
}
|
}
|
||||||
@ -1716,6 +1704,7 @@ void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
|||||||
|
|
||||||
#ifdef HAVE_NEON_AARCH64
|
#ifdef HAVE_NEON_AARCH64
|
||||||
if (iCpu & WELS_CPU_NEON) {
|
if (iCpu & WELS_CPU_NEON) {
|
||||||
|
pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_AArch64_neon;
|
||||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_AArch64_neon;
|
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_AArch64_neon;
|
||||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_AArch64_neon;
|
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_AArch64_neon;
|
||||||
}
|
}
|
||||||
@ -1723,6 +1712,7 @@ void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
|||||||
|
|
||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
if (iCpu & WELS_CPU_SSE2) {
|
if (iCpu & WELS_CPU_SSE2) {
|
||||||
|
pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_sse2;
|
||||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_sse2;
|
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_sse2;
|
||||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_sse2;
|
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_sse2;
|
||||||
}
|
}
|
||||||
@ -1730,14 +1720,6 @@ void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetNonZeroCount_c (int8_t* pNonZeroCount) {
|
|
||||||
int32_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < 24; i++) {
|
|
||||||
pNonZeroCount[i] = !!pNonZeroCount[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
|
void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
|
||||||
int32_t i;
|
int32_t i;
|
||||||
int16_t* pDst = pBlock;
|
int16_t* pDst = pBlock;
|
||||||
|
@ -65,12 +65,10 @@ typedef struct TagDeblockingFilter {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
void WelsNonZeroCount_neon (int8_t* pNonZeroCount);
|
|
||||||
void DeblockingBSCalcEnc_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride,
|
void DeblockingBSCalcEnc_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride,
|
||||||
uint8_t (*pBS)[4][4]);
|
uint8_t (*pBS)[4][4]);
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_NEON_AARCH64)
|
#if defined(HAVE_NEON_AARCH64)
|
||||||
void WelsNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
|
|
||||||
void DeblockingBSCalcEnc_AArch64_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride,
|
void DeblockingBSCalcEnc_AArch64_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride,
|
||||||
uint8_t (*pBS)[4][4]);
|
uint8_t (*pBS)[4][4]);
|
||||||
#endif
|
#endif
|
||||||
@ -79,7 +77,6 @@ void DeblockingBSCalcEnc_AArch64_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBo
|
|||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu);
|
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu);
|
||||||
|
|
||||||
void WelsNonZeroCount_c (int8_t* pNonZeroCount);
|
|
||||||
void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu);
|
void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu);
|
||||||
|
|
||||||
void PerformDeblockingFilter (sWelsEncCtx* pEnc);
|
void PerformDeblockingFilter (sWelsEncCtx* pEnc);
|
||||||
|
@ -774,13 +774,6 @@ void PerformDeblockingFilter (sWelsEncCtx* pEnc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WelsNonZeroCount_c (int8_t* pNonZeroCount) {
|
|
||||||
int32_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < 24; i++) {
|
|
||||||
pNonZeroCount[i] = !!pNonZeroCount[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu) {
|
void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu) {
|
||||||
*pfSetNZCZero = WelsNonZeroCount_c;
|
*pfSetNZCZero = WelsNonZeroCount_c;
|
||||||
#ifdef HAVE_NEON
|
#ifdef HAVE_NEON
|
||||||
@ -793,6 +786,11 @@ void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu)
|
|||||||
*pfSetNZCZero = WelsNonZeroCount_AArch64_neon;
|
*pfSetNZCZero = WelsNonZeroCount_AArch64_neon;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(X86_ASM)
|
||||||
|
if (iCpu & WELS_CPU_SSE2) {
|
||||||
|
*pfSetNZCZero = WelsNonZeroCount_sse2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
|
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include "macros.h"
|
#include "macros.h"
|
||||||
#include "decode_mb_aux.h"
|
#include "decode_mb_aux.h"
|
||||||
#include "../../codec/decoder/core/src/decode_slice.cpp"
|
#include "deblocking.h"
|
||||||
using namespace WelsDec;
|
using namespace WelsDec;
|
||||||
void IdctResAddPred_ref (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
|
void IdctResAddPred_ref (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
|
||||||
int16_t iSrc[16];
|
int16_t iSrc[16];
|
||||||
@ -98,7 +98,7 @@ TEST(DecoderDecodeMbAux, method) \
|
|||||||
{\
|
{\
|
||||||
int8_t iNonZeroCount[2][24];\
|
int8_t iNonZeroCount[2][24];\
|
||||||
for(int32_t i = 0; i < 24; i++) {\
|
for(int32_t i = 0; i < 24; i++) {\
|
||||||
iNonZeroCount[0][i] = iNonZeroCount[1][i] = (rand() % 256)-128;\
|
iNonZeroCount[0][i] = iNonZeroCount[1][i] = (rand() % 25);\
|
||||||
}\
|
}\
|
||||||
method(iNonZeroCount[0]);\
|
method(iNonZeroCount[0]);\
|
||||||
SetNonZeroCount_ref(iNonZeroCount[1]);\
|
SetNonZeroCount_ref(iNonZeroCount[1]);\
|
||||||
@ -106,7 +106,7 @@ TEST(DecoderDecodeMbAux, method) \
|
|||||||
ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
|
ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
|
||||||
}\
|
}\
|
||||||
for(int32_t i =0; i<24; i++) {\
|
for(int32_t i =0; i<24; i++) {\
|
||||||
iNonZeroCount[0][i] = iNonZeroCount[1][i] = -128;\
|
iNonZeroCount[0][i] = iNonZeroCount[1][i] = 0;\
|
||||||
}\
|
}\
|
||||||
method(iNonZeroCount[0]);\
|
method(iNonZeroCount[0]);\
|
||||||
SetNonZeroCount_ref(iNonZeroCount[1]);\
|
SetNonZeroCount_ref(iNonZeroCount[1]);\
|
||||||
@ -114,7 +114,7 @@ TEST(DecoderDecodeMbAux, method) \
|
|||||||
ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
|
ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
|
||||||
}\
|
}\
|
||||||
for(int32_t i =0; i<24; i++) {\
|
for(int32_t i =0; i<24; i++) {\
|
||||||
iNonZeroCount[0][i] = iNonZeroCount[1][i] = 127;\
|
iNonZeroCount[0][i] = iNonZeroCount[1][i] = 16;\
|
||||||
}\
|
}\
|
||||||
method(iNonZeroCount[0]);\
|
method(iNonZeroCount[0]);\
|
||||||
SetNonZeroCount_ref(iNonZeroCount[1]);\
|
SetNonZeroCount_ref(iNonZeroCount[1]);\
|
||||||
@ -123,12 +123,16 @@ TEST(DecoderDecodeMbAux, method) \
|
|||||||
}\
|
}\
|
||||||
}
|
}
|
||||||
|
|
||||||
GENERATE_SETNONZEROCOUNT (SetNonZeroCount_c)
|
GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_c)
|
||||||
|
|
||||||
|
#if defined(X86_ASM)
|
||||||
|
GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_sse2)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
GENERATE_SETNONZEROCOUNT (SetNonZeroCount_neon)
|
GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_neon)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON_AARCH64)
|
#if defined(HAVE_NEON_AARCH64)
|
||||||
GENERATE_SETNONZEROCOUNT (SetNonZeroCount_AArch64_neon)
|
GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_AArch64_neon)
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user