Merge pull request #967 from dongzha/Deblock_AArch64
add arm 64 deblock code and Unit Test code
This commit is contained in:
commit
1111757977
@ -21,6 +21,7 @@
|
||||
F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0B204F818FD23BF005DA23F /* copy_mb.cpp */; };
|
||||
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8221906673900E156A8 /* arm_arch64_common_macro.S */; };
|
||||
F556A8251906673900E156A8 /* expand_picture_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */; };
|
||||
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */; };
|
||||
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5B8D82C190757290037849A /* mc_aarch64_neon.S */; };
|
||||
FAABAA1818E9354A00D4186F /* sad_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FAABAA1718E9354A00D4186F /* sad_common.cpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
@ -66,6 +67,7 @@
|
||||
F0B204F818FD23BF005DA23F /* copy_mb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = copy_mb.cpp; sourceTree = "<group>"; };
|
||||
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = arm_arch64_common_macro.S; path = arm64/arm_arch64_common_macro.S; sourceTree = "<group>"; };
|
||||
F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = expand_picture_aarch64_neon.S; path = arm64/expand_picture_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = deblocking_aarch64_neon.S; path = arm64/deblocking_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F5B8D82C190757290037849A /* mc_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = mc_aarch64_neon.S; path = arm64/mc_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
FAABAA1618E9353F00D4186F /* sad_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sad_common.h; sourceTree = "<group>"; };
|
||||
FAABAA1718E9354A00D4186F /* sad_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sad_common.cpp; sourceTree = "<group>"; };
|
||||
@ -170,6 +172,7 @@
|
||||
F556A81D1906669F00E156A8 /* arm64 */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */,
|
||||
F5B8D82C190757290037849A /* mc_aarch64_neon.S */,
|
||||
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */,
|
||||
F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */,
|
||||
@ -231,6 +234,7 @@
|
||||
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */,
|
||||
4C3406C918D96EA600DFA14A /* arm_arch_common_macro.S in Sources */,
|
||||
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */,
|
||||
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */,
|
||||
4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */,
|
||||
4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */,
|
||||
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,
|
||||
|
@ -460,6 +460,7 @@
|
||||
"$(SRCROOT)/../../../../common/arm",
|
||||
);
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 6.1;
|
||||
ONLY_ACTIVE_ARCH = NO;
|
||||
OTHER_LDFLAGS = "-ObjC";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SKIP_INSTALL = YES;
|
||||
@ -494,6 +495,7 @@
|
||||
"$(SRCROOT)/../../../../common/arm",
|
||||
);
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 6.1;
|
||||
ONLY_ACTIVE_ARCH = NO;
|
||||
OTHER_LDFLAGS = "-ObjC";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SKIP_INSTALL = YES;
|
||||
|
1116
codec/common/arm64/deblocking_aarch64_neon.S
Normal file
1116
codec/common/arm64/deblocking_aarch64_neon.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -50,6 +50,18 @@ void DeblockChromaLt4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
|
||||
void DeblockChromaEq4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
void DeblockLumaLt4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
|
||||
void DeblockLumaEq4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||
void DeblockLumaLt4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
|
||||
void DeblockLumaEq4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||
void DeblockChromaLt4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
||||
int8_t* pTC);
|
||||
void DeblockChromaEq4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||
void DeblockChromaLt4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
|
||||
int8_t* pTC);
|
||||
void DeblockChromaEq4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||
#endif
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif//__cplusplus
|
||||
|
@ -38,6 +38,7 @@ endif
|
||||
|
||||
ifeq ($(ASM_ARCH), arm64)
|
||||
COMMON_ASM_ARM64_SRCS=\
|
||||
$(COMMON_SRCDIR)/arm64/deblocking_aarch64_neon.S\
|
||||
$(COMMON_SRCDIR)/arm64/expand_picture_aarch64_neon.S\
|
||||
$(COMMON_SRCDIR)/arm64/mc_aarch64_neon.S\
|
||||
|
||||
|
@ -732,6 +732,20 @@ void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) {
|
||||
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
if (iCpu & WELS_CPU_NEON) {
|
||||
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon;
|
||||
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon;
|
||||
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon;
|
||||
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon;
|
||||
|
||||
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
|
||||
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
|
||||
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
|
||||
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace WelsDec
|
||||
|
@ -69,6 +69,10 @@ void WelsNonZeroCount_neon (int8_t* pNonZeroCount);
|
||||
void DeblockingBSCalcEnc_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride,
|
||||
uint8_t (*pBS)[4][4]);
|
||||
#endif
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
void WelsNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
|
||||
void DeblockingBSCalcEnc_AArch64_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride, uint8_t (*pBS)[4][4]);
|
||||
#endif
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif//__cplusplus
|
||||
|
@ -573,6 +573,27 @@ void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64) && defined(SINGLE_REF_FRAME)
|
||||
void DeblockingBSCalc_AArch64_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
|
||||
int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
|
||||
DeblockingBSCalcEnc_AArch64_neon (pCurMb->pNonZeroCount, pCurMb->sMv, pCurMb->uiNeighborAvail, iMbStride, uiBS);
|
||||
if (iLeftFlag) {
|
||||
if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
|
||||
* (uint32_t*)uiBS[0][0] = 0x04040404;
|
||||
}
|
||||
} else {
|
||||
* (uint32_t*)uiBS[0][0] = 0;
|
||||
}
|
||||
if (iTopFlag) {
|
||||
if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
|
||||
* (uint32_t*)uiBS[1][0] = 0x04040404;
|
||||
}
|
||||
} else {
|
||||
* (uint32_t*)uiBS[1][0] = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
|
||||
int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
|
||||
if (iLeftFlag) {
|
||||
@ -765,6 +786,11 @@ void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu)
|
||||
*pfSetNZCZero = WelsNonZeroCount_neon;
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_NEON_AARCH64
|
||||
if (iCpu & WELS_CPU_NEON) {
|
||||
*pfSetNZCZero = WelsNonZeroCount_AArch64_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
|
||||
@ -811,6 +837,24 @@ void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
if (iCpu & WELS_CPU_NEON) {
|
||||
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon;
|
||||
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon;
|
||||
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon;
|
||||
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon;
|
||||
|
||||
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
|
||||
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
|
||||
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
|
||||
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
|
||||
|
||||
#if defined(SINGLE_REF_FRAME)
|
||||
pFunc->pfDeblockingBSCalc = DeblockingBSCalc_AArch64_neon;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -127,3 +127,20 @@ GENERATE_CHROMA_UT (ChromaLt4H_neon, DeblockChromaLt4H_neon, DeblockChromaLt4H_c
|
||||
GENERATE_CHROMA_UT (ChromaEq4V_neon, DeblockChromaEq4V_neon_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_NEON, 0)
|
||||
GENERATE_CHROMA_UT (ChromaEq4H_neon, DeblockChromaEq4H_neon_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_NEON, 1)
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
WRAP_LUMA_FUNC (DeblockLumaEq4V_AArch64_neon)
|
||||
WRAP_LUMA_FUNC (DeblockLumaEq4H_AArch64_neon)
|
||||
WRAP_CHROMA_FUNC (DeblockChromaEq4V_AArch64_neon)
|
||||
WRAP_CHROMA_FUNC (DeblockChromaEq4H_AArch64_neon)
|
||||
|
||||
GENERATE_LUMA_UT (LumaLt4V_AArch64_neon, DeblockLumaLt4V_AArch64_neon, DeblockLumaLt4V_c, WELS_CPU_NEON, 0)
|
||||
GENERATE_LUMA_UT (LumaLt4H_AArch64_neon, DeblockLumaLt4H_AArch64_neon, DeblockLumaLt4H_c, WELS_CPU_NEON, 1)
|
||||
GENERATE_LUMA_UT (LumaEq4V_AArch64_neon, DeblockLumaEq4V_AArch64_neon_wrap, DeblockLumaEq4V_c_wrap, WELS_CPU_NEON, 0)
|
||||
GENERATE_LUMA_UT (LumaEq4H_AArch64_neon, DeblockLumaEq4H_AArch64_neon_wrap, DeblockLumaEq4H_c_wrap, WELS_CPU_NEON, 1)
|
||||
|
||||
GENERATE_CHROMA_UT (ChromaLt4V_AArch64_neon, DeblockChromaLt4V_AArch64_neon, DeblockChromaLt4V_c, WELS_CPU_NEON, 0)
|
||||
GENERATE_CHROMA_UT (ChromaLt4H_AArch64_neon, DeblockChromaLt4H_AArch64_neon, DeblockChromaLt4H_c, WELS_CPU_NEON, 1)
|
||||
GENERATE_CHROMA_UT (ChromaEq4V_AArch64_neon, DeblockChromaEq4V_AArch64_neon_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_NEON, 0)
|
||||
GENERATE_CHROMA_UT (ChromaEq4H_AArch64_neon, DeblockChromaEq4H_AArch64_neon_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_NEON, 1)
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user