rephrase blockzero function complexity and remove useless functions
This commit is contained in:
@@ -348,46 +348,6 @@
|
|||||||
Name="asm"
|
Name="asm"
|
||||||
Filter="*.asm;*.inc"
|
Filter="*.asm;*.inc"
|
||||||
>
|
>
|
||||||
<File
|
|
||||||
RelativePath="..\..\..\decoder\core\x86\block_add.asm"
|
|
||||||
>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|x64"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
</File>
|
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\common\x86\cpuid.asm"
|
RelativePath="..\..\..\common\x86\cpuid.asm"
|
||||||
>
|
>
|
||||||
|
|||||||
@@ -116,46 +116,6 @@ WELS_ASM_FUNC_BEGIN SetNonZeroCount_neon
|
|||||||
WELS_ASM_FUNC_END
|
WELS_ASM_FUNC_END
|
||||||
|
|
||||||
|
|
||||||
// r0 int16_t * block,
|
|
||||||
// r1 int32_t stride
|
|
||||||
WELS_ASM_FUNC_BEGIN WelsResBlockZero16x16_neon// can use for 256*sizeof(int16_t)
|
|
||||||
push {r2}
|
|
||||||
mov r2, #16
|
|
||||||
// each row 16 elements, 16*sizeof(int16_t)
|
|
||||||
// memset(ptr_dest, 0, 16*sizeof(int16_t));
|
|
||||||
// ptr_dest += stride;
|
|
||||||
lsl r1, r1, #1 // r1 = 2*r1
|
|
||||||
veor.i16 q0, q0, q0
|
|
||||||
veor.i16 q1, q1, q1
|
|
||||||
|
|
||||||
block_zero_16x16_luma_loop:
|
|
||||||
vst1.i16 {q0, q1}, [r0], r1
|
|
||||||
subs r2, r2, #2
|
|
||||||
vst1.i16 {q0, q1}, [r0], r1
|
|
||||||
bne block_zero_16x16_luma_loop
|
|
||||||
|
|
||||||
pop {r2}
|
|
||||||
WELS_ASM_FUNC_END
|
|
||||||
|
|
||||||
WELS_ASM_FUNC_BEGIN WelsResBlockZero8x8_neon// can use for 64*sizeof(int16_t)
|
|
||||||
push {r2}
|
|
||||||
mov r2, #8
|
|
||||||
// each row 8 elements, 8*sizeof(int16_t)
|
|
||||||
// memset(ptr_dest, 0, 8*sizeof(int16_t));
|
|
||||||
// ptr_dest += stride;
|
|
||||||
lsl r1, r1, #1
|
|
||||||
veor.i16 q0, q0, q0
|
|
||||||
|
|
||||||
block_zero_8x8_chma_loop:
|
|
||||||
vst1.i16 {q0}, [r0], r1
|
|
||||||
subs r2, r2, #2
|
|
||||||
vst1.i16 {q0}, [r0], r1
|
|
||||||
bne block_zero_8x8_chma_loop
|
|
||||||
|
|
||||||
pop {r2}
|
|
||||||
WELS_ASM_FUNC_END
|
|
||||||
|
|
||||||
|
|
||||||
// uint8_t *pred, const int32_t stride, int16_t *rs
|
// uint8_t *pred, const int32_t stride, int16_t *rs
|
||||||
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
|
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
|
||||||
|
|
||||||
|
|||||||
@@ -37,8 +37,6 @@
|
|||||||
|
|
||||||
namespace WelsDec {
|
namespace WelsDec {
|
||||||
|
|
||||||
void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal);
|
|
||||||
|
|
||||||
int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
|
int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
|
||||||
int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
|
int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
|
||||||
|
|
||||||
@@ -66,22 +64,13 @@ extern "C" {
|
|||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
void WelsResBlockZero16x16_neon(int16_t* pBlock, int32_t iStride);
|
|
||||||
void WelsResBlockZero8x8_neon(int16_t* pBlock, int32_t iStride);
|
|
||||||
void SetNonZeroCount_neon(int16_t* pBlock, int8_t* pNonZeroCount);
|
void SetNonZeroCount_neon(int16_t* pBlock, int8_t* pNonZeroCount);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef X86_ASM
|
|
||||||
void WelsResBlockZero16x16_sse2 (int16_t* pBlock, int32_t iStride);
|
|
||||||
void WelsResBlockZero8x8_sse2 (int16_t* pBlock, int32_t iStride);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
|
|
||||||
void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride);
|
|
||||||
void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride);
|
|
||||||
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount);
|
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount);
|
||||||
|
|
||||||
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);
|
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);
|
||||||
|
|||||||
@@ -133,15 +133,9 @@ typedef struct TagDeblockingFunc {
|
|||||||
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor;
|
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor;
|
||||||
} SDeblockingFunc, *PDeblockingFunc;
|
} SDeblockingFunc, *PDeblockingFunc;
|
||||||
|
|
||||||
typedef void (*PWelsBlockAddStrideFunc) (uint8_t* pDest, uint8_t* pPred, int16_t* pRes, int32_t iPredStride,
|
|
||||||
int32_t iResStride);
|
|
||||||
typedef void (*PWelsBlockZeroFunc) (int16_t* pBlock, int32_t iStride);
|
|
||||||
typedef void (*PWelsNonZeroCountFunc) (int16_t* pBlock, int8_t* pNonZeroCount);
|
typedef void (*PWelsNonZeroCountFunc) (int16_t* pBlock, int8_t* pNonZeroCount);
|
||||||
typedef void (*PWelsSimpleIdct4x4AddFunc) (int16_t* pDest, int16_t* pSrc, int32_t iStride);
|
|
||||||
|
|
||||||
typedef struct TagBlockFunc {
|
typedef struct TagBlockFunc {
|
||||||
PWelsBlockZeroFunc pWelsBlockZero16x16Func;
|
|
||||||
PWelsBlockZeroFunc pWelsBlockZero8x8Func;
|
|
||||||
PWelsNonZeroCountFunc pWelsSetNonZeroCountFunc;
|
PWelsNonZeroCountFunc pWelsSetNonZeroCountFunc;
|
||||||
} SBlockFunc;
|
} SBlockFunc;
|
||||||
|
|
||||||
|
|||||||
@@ -842,9 +842,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
|
|||||||
uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
|
uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
|
||||||
}
|
}
|
||||||
|
|
||||||
pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
|
memset(pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof(int16_t));
|
||||||
pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
|
|
||||||
pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
|
|
||||||
|
|
||||||
ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
|
ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
|
||||||
ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
|
ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
|
||||||
@@ -1043,43 +1041,15 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal) {
|
|
||||||
int32_t i;
|
|
||||||
int16_t* pDst = pBlock;
|
|
||||||
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
memset (pDst, uiVal, iWidth * sizeof (int16_t));
|
|
||||||
pDst += iStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
|
||||||
pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
|
|
||||||
pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
|
|
||||||
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c;
|
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c;
|
||||||
|
|
||||||
#ifdef X86_ASM
|
|
||||||
if (iCpu & WELS_CPU_SSE2) {
|
|
||||||
pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_sse2;
|
|
||||||
pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_sse2;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
#ifdef HAVE_NEON
|
||||||
if ( iCpu & WELS_CPU_NEON ) {
|
if ( iCpu & WELS_CPU_NEON ) {
|
||||||
pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_neon;
|
|
||||||
pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_neon;
|
|
||||||
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_neon;
|
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_neon;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
|
|
||||||
WelsBlockInit (pBlock, 16, 16, iStride, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
|
|
||||||
WelsBlockInit (pBlock, 8, 8, iStride, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount) {
|
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount) {
|
||||||
int32_t i;
|
int32_t i;
|
||||||
|
|||||||
@@ -1,151 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* block_add.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* add block
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 09/21/2009 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Code
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
|
|
||||||
;*******************************************************************************
|
|
||||||
WELS_EXTERN WelsResBlockZero16x16_sse2
|
|
||||||
%assign push_num 0
|
|
||||||
LOAD_2_PARA
|
|
||||||
PUSH_XMM 8
|
|
||||||
SIGN_EXTENSION r1, r1d
|
|
||||||
lea r1, [r1*2]
|
|
||||||
lea r2, [r1*3]
|
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
|
|
||||||
; four lines
|
|
||||||
movdqa [r0], xmm7
|
|
||||||
movdqa [r0+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1], xmm7
|
|
||||||
movdqa [r0+r1+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1*2], xmm7
|
|
||||||
movdqa [r0+r1*2+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r2], xmm7
|
|
||||||
movdqa [r0+r2+10h], xmm7
|
|
||||||
|
|
||||||
; four lines
|
|
||||||
lea r0, [r0+r1*4]
|
|
||||||
movdqa [r0], xmm7
|
|
||||||
movdqa [r0+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1], xmm7
|
|
||||||
movdqa [r0+r1+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1*2], xmm7
|
|
||||||
movdqa [r0+r1*2+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r2], xmm7
|
|
||||||
movdqa [r0+r2+10h], xmm7
|
|
||||||
|
|
||||||
; four lines
|
|
||||||
lea r0, [r0+r1*4]
|
|
||||||
movdqa [r0], xmm7
|
|
||||||
movdqa [r0+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1], xmm7
|
|
||||||
movdqa [r0+r1+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1*2], xmm7
|
|
||||||
movdqa [r0+r1*2+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r2], xmm7
|
|
||||||
movdqa [r0+r2+10h], xmm7
|
|
||||||
|
|
||||||
; four lines
|
|
||||||
lea r0, [r0+r1*4]
|
|
||||||
movdqa [r0], xmm7
|
|
||||||
movdqa [r0+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1], xmm7
|
|
||||||
movdqa [r0+r1+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r1*2], xmm7
|
|
||||||
movdqa [r0+r1*2+10h], xmm7
|
|
||||||
|
|
||||||
movdqa [r0+r2], xmm7
|
|
||||||
movdqa [r0+r2+10h], xmm7
|
|
||||||
|
|
||||||
POP_XMM
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
|
|
||||||
;*******************************************************************************
|
|
||||||
WELS_EXTERN WelsResBlockZero8x8_sse2
|
|
||||||
%assign push_num 0
|
|
||||||
LOAD_2_PARA
|
|
||||||
PUSH_XMM 8
|
|
||||||
SIGN_EXTENSION r1, r1d
|
|
||||||
lea r1, [r1*2]
|
|
||||||
lea r2, [r1*3]
|
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
|
|
||||||
movdqa [r0], xmm7
|
|
||||||
movdqa [r0+r1], xmm7
|
|
||||||
movdqa [r0+r1*2], xmm7
|
|
||||||
movdqa [r0+r2], xmm7
|
|
||||||
|
|
||||||
lea r0, [r0+r1*4]
|
|
||||||
movdqa [r0], xmm7
|
|
||||||
movdqa [r0+r1], xmm7
|
|
||||||
movdqa [r0+r1*2], xmm7
|
|
||||||
movdqa [r0+r2], xmm7
|
|
||||||
|
|
||||||
|
|
||||||
POP_XMM
|
|
||||||
ret
|
|
||||||
|
|
||||||
@@ -27,7 +27,6 @@ DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.$(OBJ))
|
|||||||
|
|
||||||
ifeq ($(ASM_ARCH), x86)
|
ifeq ($(ASM_ARCH), x86)
|
||||||
DECODER_ASM_SRCS=\
|
DECODER_ASM_SRCS=\
|
||||||
$(DECODER_SRCDIR)/core/x86/block_add.asm\
|
|
||||||
$(DECODER_SRCDIR)/core/x86/dct.asm\
|
$(DECODER_SRCDIR)/core/x86/dct.asm\
|
||||||
$(DECODER_SRCDIR)/core/x86/intra_pred.asm\
|
$(DECODER_SRCDIR)/core/x86/intra_pred.asm\
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user