rephrase blockzero function complexity and remove useless functions
This commit is contained in:
parent
c247c5a05d
commit
06c534d9f2
@ -348,46 +348,6 @@
|
||||
Name="asm"
|
||||
Filter="*.asm;*.inc"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\decoder\core\x86\block_add.asm"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\common\x86\cpuid.asm"
|
||||
>
|
||||
|
@ -116,46 +116,6 @@ WELS_ASM_FUNC_BEGIN SetNonZeroCount_neon
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
|
||||
// r0 int16_t * block,
|
||||
// r1 int32_t stride
|
||||
WELS_ASM_FUNC_BEGIN WelsResBlockZero16x16_neon// can use for 256*sizeof(int16_t)
|
||||
push {r2}
|
||||
mov r2, #16
|
||||
// each row 16 elements, 16*sizeof(int16_t)
|
||||
// memset(ptr_dest, 0, 16*sizeof(int16_t));
|
||||
// ptr_dest += stride;
|
||||
lsl r1, r1, #1 // r1 = 2*r1
|
||||
veor.i16 q0, q0, q0
|
||||
veor.i16 q1, q1, q1
|
||||
|
||||
block_zero_16x16_luma_loop:
|
||||
vst1.i16 {q0, q1}, [r0], r1
|
||||
subs r2, r2, #2
|
||||
vst1.i16 {q0, q1}, [r0], r1
|
||||
bne block_zero_16x16_luma_loop
|
||||
|
||||
pop {r2}
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsResBlockZero8x8_neon// can use for 64*sizeof(int16_t)
|
||||
push {r2}
|
||||
mov r2, #8
|
||||
// each row 8 elements, 8*sizeof(int16_t)
|
||||
// memset(ptr_dest, 0, 8*sizeof(int16_t));
|
||||
// ptr_dest += stride;
|
||||
lsl r1, r1, #1
|
||||
veor.i16 q0, q0, q0
|
||||
|
||||
block_zero_8x8_chma_loop:
|
||||
vst1.i16 {q0}, [r0], r1
|
||||
subs r2, r2, #2
|
||||
vst1.i16 {q0}, [r0], r1
|
||||
bne block_zero_8x8_chma_loop
|
||||
|
||||
pop {r2}
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
|
||||
// uint8_t *pred, const int32_t stride, int16_t *rs
|
||||
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
|
||||
|
||||
|
@ -37,8 +37,6 @@
|
||||
|
||||
namespace WelsDec {
|
||||
|
||||
void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal);
|
||||
|
||||
int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
|
||||
int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
|
||||
|
||||
@ -66,22 +64,13 @@ extern "C" {
|
||||
#endif//__cplusplus
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
void WelsResBlockZero16x16_neon(int16_t* pBlock, int32_t iStride);
|
||||
void WelsResBlockZero8x8_neon(int16_t* pBlock, int32_t iStride);
|
||||
void SetNonZeroCount_neon(int16_t* pBlock, int8_t* pNonZeroCount);
|
||||
#endif
|
||||
|
||||
#ifdef X86_ASM
|
||||
void WelsResBlockZero16x16_sse2 (int16_t* pBlock, int32_t iStride);
|
||||
void WelsResBlockZero8x8_sse2 (int16_t* pBlock, int32_t iStride);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif//__cplusplus
|
||||
|
||||
void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride);
|
||||
void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride);
|
||||
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount);
|
||||
|
||||
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);
|
||||
|
@ -133,15 +133,9 @@ typedef struct TagDeblockingFunc {
|
||||
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor;
|
||||
} SDeblockingFunc, *PDeblockingFunc;
|
||||
|
||||
typedef void (*PWelsBlockAddStrideFunc) (uint8_t* pDest, uint8_t* pPred, int16_t* pRes, int32_t iPredStride,
|
||||
int32_t iResStride);
|
||||
typedef void (*PWelsBlockZeroFunc) (int16_t* pBlock, int32_t iStride);
|
||||
typedef void (*PWelsNonZeroCountFunc) (int16_t* pBlock, int8_t* pNonZeroCount);
|
||||
typedef void (*PWelsSimpleIdct4x4AddFunc) (int16_t* pDest, int16_t* pSrc, int32_t iStride);
|
||||
|
||||
typedef struct TagBlockFunc {
|
||||
PWelsBlockZeroFunc pWelsBlockZero16x16Func;
|
||||
PWelsBlockZeroFunc pWelsBlockZero8x8Func;
|
||||
PWelsNonZeroCountFunc pWelsSetNonZeroCountFunc;
|
||||
} SBlockFunc;
|
||||
|
||||
|
@ -842,9 +842,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
|
||||
uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
|
||||
}
|
||||
|
||||
pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
|
||||
pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
|
||||
pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
|
||||
memset(pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof(int16_t));
|
||||
|
||||
ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
|
||||
ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
|
||||
@ -1043,43 +1041,15 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal) {
|
||||
int32_t i;
|
||||
int16_t* pDst = pBlock;
|
||||
|
||||
for (i = 0; i < iHeight; i++) {
|
||||
memset (pDst, uiVal, iWidth * sizeof (int16_t));
|
||||
pDst += iStride;
|
||||
}
|
||||
}
|
||||
|
||||
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
|
||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
|
||||
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c;
|
||||
|
||||
#ifdef X86_ASM
|
||||
if (iCpu & WELS_CPU_SSE2) {
|
||||
pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_sse2;
|
||||
pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_sse2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
if ( iCpu & WELS_CPU_NEON ) {
|
||||
pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_neon;
|
||||
pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_neon;
|
||||
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
|
||||
WelsBlockInit (pBlock, 16, 16, iStride, 0);
|
||||
}
|
||||
|
||||
void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
|
||||
WelsBlockInit (pBlock, 8, 8, iStride, 0);
|
||||
}
|
||||
|
||||
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount) {
|
||||
int32_t i;
|
||||
|
@ -1,151 +0,0 @@
|
||||
;*!
|
||||
;* \copy
|
||||
;* Copyright (c) 2009-2013, Cisco Systems
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* * Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;*
|
||||
;* * Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in
|
||||
;* the documentation and/or other materials provided with the
|
||||
;* distribution.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
;* POSSIBILITY OF SUCH DAMAGE.
|
||||
;*
|
||||
;*
|
||||
;* block_add.asm
|
||||
;*
|
||||
;* Abstract
|
||||
;* add block
|
||||
;*
|
||||
;* History
|
||||
;* 09/21/2009 Created
|
||||
;*
|
||||
;*
|
||||
;*************************************************************************/
|
||||
|
||||
%include "asm_inc.asm"
|
||||
|
||||
;*******************************************************************************
|
||||
; Code
|
||||
;*******************************************************************************
|
||||
|
||||
SECTION .text
|
||||
|
||||
|
||||
;*******************************************************************************
|
||||
; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsResBlockZero16x16_sse2
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
PUSH_XMM 8
|
||||
SIGN_EXTENSION r1, r1d
|
||||
lea r1, [r1*2]
|
||||
lea r2, [r1*3]
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
; four lines
|
||||
movdqa [r0], xmm7
|
||||
movdqa [r0+10h], xmm7
|
||||
|
||||
movdqa [r0+r1], xmm7
|
||||
movdqa [r0+r1+10h], xmm7
|
||||
|
||||
movdqa [r0+r1*2], xmm7
|
||||
movdqa [r0+r1*2+10h], xmm7
|
||||
|
||||
movdqa [r0+r2], xmm7
|
||||
movdqa [r0+r2+10h], xmm7
|
||||
|
||||
; four lines
|
||||
lea r0, [r0+r1*4]
|
||||
movdqa [r0], xmm7
|
||||
movdqa [r0+10h], xmm7
|
||||
|
||||
movdqa [r0+r1], xmm7
|
||||
movdqa [r0+r1+10h], xmm7
|
||||
|
||||
movdqa [r0+r1*2], xmm7
|
||||
movdqa [r0+r1*2+10h], xmm7
|
||||
|
||||
movdqa [r0+r2], xmm7
|
||||
movdqa [r0+r2+10h], xmm7
|
||||
|
||||
; four lines
|
||||
lea r0, [r0+r1*4]
|
||||
movdqa [r0], xmm7
|
||||
movdqa [r0+10h], xmm7
|
||||
|
||||
movdqa [r0+r1], xmm7
|
||||
movdqa [r0+r1+10h], xmm7
|
||||
|
||||
movdqa [r0+r1*2], xmm7
|
||||
movdqa [r0+r1*2+10h], xmm7
|
||||
|
||||
movdqa [r0+r2], xmm7
|
||||
movdqa [r0+r2+10h], xmm7
|
||||
|
||||
; four lines
|
||||
lea r0, [r0+r1*4]
|
||||
movdqa [r0], xmm7
|
||||
movdqa [r0+10h], xmm7
|
||||
|
||||
movdqa [r0+r1], xmm7
|
||||
movdqa [r0+r1+10h], xmm7
|
||||
|
||||
movdqa [r0+r1*2], xmm7
|
||||
movdqa [r0+r1*2+10h], xmm7
|
||||
|
||||
movdqa [r0+r2], xmm7
|
||||
movdqa [r0+r2+10h], xmm7
|
||||
|
||||
POP_XMM
|
||||
ret
|
||||
|
||||
|
||||
;*******************************************************************************
|
||||
; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
|
||||
;*******************************************************************************
|
||||
WELS_EXTERN WelsResBlockZero8x8_sse2
|
||||
%assign push_num 0
|
||||
LOAD_2_PARA
|
||||
PUSH_XMM 8
|
||||
SIGN_EXTENSION r1, r1d
|
||||
lea r1, [r1*2]
|
||||
lea r2, [r1*3]
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
movdqa [r0], xmm7
|
||||
movdqa [r0+r1], xmm7
|
||||
movdqa [r0+r1*2], xmm7
|
||||
movdqa [r0+r2], xmm7
|
||||
|
||||
lea r0, [r0+r1*4]
|
||||
movdqa [r0], xmm7
|
||||
movdqa [r0+r1], xmm7
|
||||
movdqa [r0+r1*2], xmm7
|
||||
movdqa [r0+r2], xmm7
|
||||
|
||||
|
||||
POP_XMM
|
||||
ret
|
||||
|
@ -27,7 +27,6 @@ DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.$(OBJ))
|
||||
|
||||
ifeq ($(ASM_ARCH), x86)
|
||||
DECODER_ASM_SRCS=\
|
||||
$(DECODER_SRCDIR)/core/x86/block_add.asm\
|
||||
$(DECODER_SRCDIR)/core/x86/dct.asm\
|
||||
$(DECODER_SRCDIR)/core/x86/intra_pred.asm\
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user