Add asm code for decoder cabac
This commit is contained in:
parent
9da19758cf
commit
92bc88eacb
@ -156,4 +156,22 @@ WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
|
||||
vst1.32 {d22[0]},[r2],r1
|
||||
vst1.32 {d22[1]},[r2]
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsBlockZero16x16_neon
|
||||
veor q0, q0
|
||||
veor q1, q1
|
||||
lsl r1, r1, 1
|
||||
.rept 16
|
||||
vst1.64 {q0, q1}, [r0], r1
|
||||
.endr
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsBlockZero8x8_neon
|
||||
veor q0, q0
|
||||
lsl r1, r1, 1
|
||||
.rept 8
|
||||
vst1.64 {q0}, [r0], r1
|
||||
.endr
|
||||
WELS_ASM_FUNC_END
|
||||
#endif
|
||||
|
@ -158,4 +158,21 @@ WELS_ASM_AARCH64_FUNC_BEGIN IdctResAddPred_AArch64_neon
|
||||
st1 {v1.s}[0],[x2],x1
|
||||
st1 {v1.s}[1],[x2]
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero16x16_AArch64_neon
|
||||
eor v0.16b, v0.16b, v0.16b
|
||||
eor v1.16b, v1.16b, v1.16b
|
||||
lsl x1, x1, 1
|
||||
.rept 16
|
||||
st1 {v0.16b, v1.16b}, [x0], x1
|
||||
.endr
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero8x8_AArch64_neon
|
||||
eor v0.16b, v0.16b, v0.16b
|
||||
lsl x1, x1, 1
|
||||
.rept 8
|
||||
st1 {v0.16b}, [x0], x1
|
||||
.endr
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
#endif
|
||||
|
@ -67,12 +67,21 @@ void WelsChromaDcIdct (int16_t* pBlock);
|
||||
extern "C" {
|
||||
#endif//__cplusplus
|
||||
|
||||
#if defined(X86_ASM)
|
||||
void WelsBlockZero16x16_sse2(int16_t * block, int32_t stride);
|
||||
void WelsBlockZero8x8_sse2(int16_t * block, int32_t stride);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
void SetNonZeroCount_neon (int8_t* pNonZeroCount);
|
||||
void WelsBlockZero16x16_neon(int16_t * block, int32_t stride);
|
||||
void WelsBlockZero8x8_neon(int16_t * block, int32_t stride);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
void SetNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
|
||||
void WelsBlockZero16x16_AArch64_neon(int16_t * block, int32_t stride);
|
||||
void WelsBlockZero8x8_AArch64_neon(int16_t * block, int32_t stride);
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -1644,15 +1644,25 @@ void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
||||
//TO DO add neon and X86
|
||||
#ifdef HAVE_NEON
|
||||
if (iCpu & WELS_CPU_NEON) {
|
||||
|
||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_neon;
|
||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_NEON_AARCH64
|
||||
if (iCpu & WELS_CPU_NEON) {
|
||||
|
||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_AArch64_neon;
|
||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_AArch64_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(X86_ASM)
|
||||
if (iCpu & WELS_CPU_SSE2) {
|
||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_sse2;
|
||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_sse2;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void SetNonZeroCount_c (int8_t* pNonZeroCount) {
|
||||
|
@ -113,3 +113,30 @@ WELS_EXTERN IdctResAddPred_mmx
|
||||
|
||||
emms
|
||||
ret
|
||||
|
||||
;void WelsBlockZero16x16_sse2(int16_t * block, int32_t stride);
|
||||
WELS_EXTERN WelsBlockZero16x16_sse2
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
shl r1, 1
|
||||
pxor xmm0, xmm0
|
||||
%rep 16
|
||||
movdqa [r0], xmm0
|
||||
movdqa [r0+16], xmm0
|
||||
add r0, r1
|
||||
%endrep
|
||||
ret
|
||||
|
||||
;void WelsBlockZero8x8_sse2(int16_t * block, int32_t stride);
|
||||
WELS_EXTERN WelsBlockZero8x8_sse2
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
SIGN_EXTENSION r1, r1d
|
||||
shl r1, 1
|
||||
pxor xmm0, xmm0
|
||||
%rep 8
|
||||
movdqa [r0], xmm0
|
||||
add r0, r1
|
||||
%endrep
|
||||
ret
|
||||
|
Loading…
x
Reference in New Issue
Block a user