rephrase blockzero function complexity and remove useless functions

This commit is contained in:
wayne liu 2014-03-19 19:31:34 -07:00
parent c247c5a05d
commit 06c534d9f2
7 changed files with 1 additions and 280 deletions

View File

@ -348,46 +348,6 @@
Name="asm"
Filter="*.asm;*.inc"
>
<File
RelativePath="..\..\..\decoder\core\x86\block_add.asm"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\common\x86\cpuid.asm"
>

View File

@ -116,46 +116,6 @@ WELS_ASM_FUNC_BEGIN SetNonZeroCount_neon
WELS_ASM_FUNC_END
// r0 int16_t * block,
// r1 int32_t stride
WELS_ASM_FUNC_BEGIN WelsResBlockZero16x16_neon// can use for 256*sizeof(int16_t)
push {r2}
mov r2, #16
// each row 16 elements, 16*sizeof(int16_t)
// memset(ptr_dest, 0, 16*sizeof(int16_t));
// ptr_dest += stride;
lsl r1, r1, #1 // r1 = 2*r1
veor.i16 q0, q0, q0
veor.i16 q1, q1, q1
block_zero_16x16_luma_loop:
vst1.i16 {q0, q1}, [r0], r1
subs r2, r2, #2
vst1.i16 {q0, q1}, [r0], r1
bne block_zero_16x16_luma_loop
pop {r2}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsResBlockZero8x8_neon// can use for 64*sizeof(int16_t)
push {r2}
mov r2, #8
// each row 8 elements, 8*sizeof(int16_t)
// memset(ptr_dest, 0, 8*sizeof(int16_t));
// ptr_dest += stride;
lsl r1, r1, #1
veor.i16 q0, q0, q0
block_zero_8x8_chma_loop:
vst1.i16 {q0}, [r0], r1
subs r2, r2, #2
vst1.i16 {q0}, [r0], r1
bne block_zero_8x8_chma_loop
pop {r2}
WELS_ASM_FUNC_END
// uint8_t *pred, const int32_t stride, int16_t *rs
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon

View File

@ -37,8 +37,6 @@
namespace WelsDec {
void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal);
int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
@ -66,22 +64,13 @@ extern "C" {
#endif//__cplusplus
#if defined(HAVE_NEON)
void WelsResBlockZero16x16_neon(int16_t* pBlock, int32_t iStride);
void WelsResBlockZero8x8_neon(int16_t* pBlock, int32_t iStride);
void SetNonZeroCount_neon(int16_t* pBlock, int8_t* pNonZeroCount);
#endif
#ifdef X86_ASM
void WelsResBlockZero16x16_sse2 (int16_t* pBlock, int32_t iStride);
void WelsResBlockZero8x8_sse2 (int16_t* pBlock, int32_t iStride);
#endif
#ifdef __cplusplus
}
#endif//__cplusplus
void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride);
void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride);
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount);
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);

View File

@ -133,15 +133,9 @@ typedef struct TagDeblockingFunc {
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor;
} SDeblockingFunc, *PDeblockingFunc;
typedef void (*PWelsBlockAddStrideFunc) (uint8_t* pDest, uint8_t* pPred, int16_t* pRes, int32_t iPredStride,
int32_t iResStride);
typedef void (*PWelsBlockZeroFunc) (int16_t* pBlock, int32_t iStride);
typedef void (*PWelsNonZeroCountFunc) (int16_t* pBlock, int8_t* pNonZeroCount);
typedef void (*PWelsSimpleIdct4x4AddFunc) (int16_t* pDest, int16_t* pSrc, int32_t iStride);
typedef struct TagBlockFunc {
PWelsBlockZeroFunc pWelsBlockZero16x16Func;
PWelsBlockZeroFunc pWelsBlockZero8x8Func;
PWelsNonZeroCountFunc pWelsSetNonZeroCountFunc;
} SBlockFunc;

View File

@ -842,9 +842,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
}
pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
memset(pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof(int16_t));
ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
@ -1043,43 +1041,15 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur) {
return 0;
}
void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal) {
int32_t i;
int16_t* pDst = pBlock;
for (i = 0; i < iHeight; i++) {
memset (pDst, uiVal, iWidth * sizeof (int16_t));
pDst += iStride;
}
}
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c;
#ifdef X86_ASM
if (iCpu & WELS_CPU_SSE2) {
pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_sse2;
pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_sse2;
}
#endif
#ifdef HAVE_NEON
if ( iCpu & WELS_CPU_NEON ) {
pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_neon;
pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_neon;
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_neon;
}
#endif
}
void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
WelsBlockInit (pBlock, 16, 16, iStride, 0);
}
void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
WelsBlockInit (pBlock, 8, 8, iStride, 0);
}
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount) {
int32_t i;

View File

@ -1,151 +0,0 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* block_add.asm
;*
;* Abstract
;* add block
;*
;* History
;* 09/21/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;*******************************************************************************
; Code
;*******************************************************************************
SECTION .text
;*******************************************************************************
; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
;*******************************************************************************
WELS_EXTERN WelsResBlockZero16x16_sse2
%assign push_num 0
LOAD_2_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
lea r1, [r1*2]
lea r2, [r1*3]
pxor xmm7, xmm7
; four lines
movdqa [r0], xmm7
movdqa [r0+10h], xmm7
movdqa [r0+r1], xmm7
movdqa [r0+r1+10h], xmm7
movdqa [r0+r1*2], xmm7
movdqa [r0+r1*2+10h], xmm7
movdqa [r0+r2], xmm7
movdqa [r0+r2+10h], xmm7
; four lines
lea r0, [r0+r1*4]
movdqa [r0], xmm7
movdqa [r0+10h], xmm7
movdqa [r0+r1], xmm7
movdqa [r0+r1+10h], xmm7
movdqa [r0+r1*2], xmm7
movdqa [r0+r1*2+10h], xmm7
movdqa [r0+r2], xmm7
movdqa [r0+r2+10h], xmm7
; four lines
lea r0, [r0+r1*4]
movdqa [r0], xmm7
movdqa [r0+10h], xmm7
movdqa [r0+r1], xmm7
movdqa [r0+r1+10h], xmm7
movdqa [r0+r1*2], xmm7
movdqa [r0+r1*2+10h], xmm7
movdqa [r0+r2], xmm7
movdqa [r0+r2+10h], xmm7
; four lines
lea r0, [r0+r1*4]
movdqa [r0], xmm7
movdqa [r0+10h], xmm7
movdqa [r0+r1], xmm7
movdqa [r0+r1+10h], xmm7
movdqa [r0+r1*2], xmm7
movdqa [r0+r1*2+10h], xmm7
movdqa [r0+r2], xmm7
movdqa [r0+r2+10h], xmm7
POP_XMM
ret
;*******************************************************************************
; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
;*******************************************************************************
WELS_EXTERN WelsResBlockZero8x8_sse2
%assign push_num 0
LOAD_2_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
lea r1, [r1*2]
lea r2, [r1*3]
pxor xmm7, xmm7
movdqa [r0], xmm7
movdqa [r0+r1], xmm7
movdqa [r0+r1*2], xmm7
movdqa [r0+r2], xmm7
lea r0, [r0+r1*4]
movdqa [r0], xmm7
movdqa [r0+r1], xmm7
movdqa [r0+r1*2], xmm7
movdqa [r0+r2], xmm7
POP_XMM
ret

View File

@ -27,7 +27,6 @@ DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.$(OBJ))
ifeq ($(ASM_ARCH), x86)
DECODER_ASM_SRCS=\
$(DECODER_SRCDIR)/core/x86/block_add.asm\
$(DECODER_SRCDIR)/core/x86/dct.asm\
$(DECODER_SRCDIR)/core/x86/intra_pred.asm\