Add asm code for decoder cabac

This commit is contained in:
zhiliang wang 2014-11-26 16:44:12 +08:00
parent 9da19758cf
commit 92bc88eacb
5 changed files with 83 additions and 2 deletions

View File

@ -156,4 +156,22 @@ WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
vst1.32 {d22[0]},[r2],r1
vst1.32 {d22[1]},[r2]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsBlockZero16x16_neon
veor q0, q0
veor q1, q1
lsl r1, r1, 1
.rept 16
vst1.64 {q0, q1}, [r0], r1
.endr
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsBlockZero8x8_neon
veor q0, q0
lsl r1, r1, 1
.rept 8
vst1.64 {q0}, [r0], r1
.endr
WELS_ASM_FUNC_END
#endif

View File

@ -158,4 +158,21 @@ WELS_ASM_AARCH64_FUNC_BEGIN IdctResAddPred_AArch64_neon
st1 {v1.s}[0],[x2],x1
st1 {v1.s}[1],[x2]
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero16x16_AArch64_neon
eor v0.16b, v0.16b, v0.16b
eor v1.16b, v1.16b, v1.16b
lsl x1, x1, 1
.rept 16
st1 {v0.16b, v1.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero8x8_AArch64_neon
eor v0.16b, v0.16b, v0.16b
lsl x1, x1, 1
.rept 8
st1 {v0.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -67,12 +67,21 @@ void WelsChromaDcIdct (int16_t* pBlock);
extern "C" {
#endif//__cplusplus
#if defined(X86_ASM)
void WelsBlockZero16x16_sse2(int16_t * block, int32_t stride);
void WelsBlockZero8x8_sse2(int16_t * block, int32_t stride);
#endif
#if defined(HAVE_NEON)
void SetNonZeroCount_neon (int8_t* pNonZeroCount);
void WelsBlockZero16x16_neon(int16_t * block, int32_t stride);
void WelsBlockZero8x8_neon(int16_t * block, int32_t stride);
#endif
#if defined(HAVE_NEON_AARCH64)
void SetNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
void WelsBlockZero16x16_AArch64_neon(int16_t * block, int32_t stride);
void WelsBlockZero8x8_AArch64_neon(int16_t * block, int32_t stride);
#endif
#ifdef __cplusplus
}

View File

@ -1644,15 +1644,25 @@ void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
//TO DO add neon and X86
#ifdef HAVE_NEON
if (iCpu & WELS_CPU_NEON) {
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_neon;
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_neon;
}
#endif
#ifdef HAVE_NEON_AARCH64
if (iCpu & WELS_CPU_NEON) {
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_AArch64_neon;
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_AArch64_neon;
}
#endif
#if defined(X86_ASM)
if (iCpu & WELS_CPU_SSE2) {
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_sse2;
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_sse2;
}
#endif
}
void SetNonZeroCount_c (int8_t* pNonZeroCount) {

View File

@ -113,3 +113,30 @@ WELS_EXTERN IdctResAddPred_mmx
emms
ret
;void WelsBlockZero16x16_sse2(int16_t * block, int32_t stride);
WELS_EXTERN WelsBlockZero16x16_sse2
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
shl r1, 1
pxor xmm0, xmm0
%rep 16
movdqa [r0], xmm0
movdqa [r0+16], xmm0
add r0, r1
%endrep
ret
;void WelsBlockZero8x8_sse2(int16_t * block, int32_t stride);
WELS_EXTERN WelsBlockZero8x8_sse2
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
shl r1, 1
pxor xmm0, xmm0
%rep 8
movdqa [r0], xmm0
add r0, r1
%endrep
ret