Merge pull request #967 from dongzha/Deblock_AArch64

add arm 64 deblock code and Unit Test code
This commit is contained in:
ruil2 2014-06-16 17:19:25 +08:00
commit 1111757977
9 changed files with 1214 additions and 0 deletions

View File

@ -21,6 +21,7 @@
F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0B204F818FD23BF005DA23F /* copy_mb.cpp */; };
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8221906673900E156A8 /* arm_arch64_common_macro.S */; };
F556A8251906673900E156A8 /* expand_picture_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */; };
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */; };
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5B8D82C190757290037849A /* mc_aarch64_neon.S */; };
FAABAA1818E9354A00D4186F /* sad_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FAABAA1718E9354A00D4186F /* sad_common.cpp */; };
/* End PBXBuildFile section */
@ -66,6 +67,7 @@
F0B204F818FD23BF005DA23F /* copy_mb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = copy_mb.cpp; sourceTree = "<group>"; };
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = arm_arch64_common_macro.S; path = arm64/arm_arch64_common_macro.S; sourceTree = "<group>"; };
F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = expand_picture_aarch64_neon.S; path = arm64/expand_picture_aarch64_neon.S; sourceTree = "<group>"; };
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = deblocking_aarch64_neon.S; path = arm64/deblocking_aarch64_neon.S; sourceTree = "<group>"; };
F5B8D82C190757290037849A /* mc_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = mc_aarch64_neon.S; path = arm64/mc_aarch64_neon.S; sourceTree = "<group>"; };
FAABAA1618E9353F00D4186F /* sad_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sad_common.h; sourceTree = "<group>"; };
FAABAA1718E9354A00D4186F /* sad_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sad_common.cpp; sourceTree = "<group>"; };
@ -170,6 +172,7 @@
F556A81D1906669F00E156A8 /* arm64 */ = {
isa = PBXGroup;
children = (
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */,
F5B8D82C190757290037849A /* mc_aarch64_neon.S */,
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */,
F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */,
@ -231,6 +234,7 @@
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */,
4C3406C918D96EA600DFA14A /* arm_arch_common_macro.S in Sources */,
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */,
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */,
4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */,
4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */,
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,

View File

@ -460,6 +460,7 @@
"$(SRCROOT)/../../../../common/arm",
);
IPHONEOS_DEPLOYMENT_TARGET = 6.1;
ONLY_ACTIVE_ARCH = NO;
OTHER_LDFLAGS = "-ObjC";
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;
@ -494,6 +495,7 @@
"$(SRCROOT)/../../../../common/arm",
);
IPHONEOS_DEPLOYMENT_TARGET = 6.1;
ONLY_ACTIVE_ARCH = NO;
OTHER_LDFLAGS = "-ObjC";
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;

File diff suppressed because it is too large Load Diff

View File

@ -50,6 +50,18 @@ void DeblockChromaLt4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
void DeblockChromaEq4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
#endif
#if defined(HAVE_NEON_AARCH64)
void DeblockLumaLt4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaLt4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
#endif
#if defined(__cplusplus)
}
#endif//__cplusplus

View File

@ -38,6 +38,7 @@ endif
ifeq ($(ASM_ARCH), arm64)
COMMON_ASM_ARM64_SRCS=\
$(COMMON_SRCDIR)/arm64/deblocking_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/expand_picture_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/mc_aarch64_neon.S\

View File

@ -732,6 +732,20 @@ void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) {
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
}
#endif
#if defined(HAVE_NEON_AARCH64)
if (iCpu & WELS_CPU_NEON) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
}
#endif
}
} // namespace WelsDec

View File

@ -69,6 +69,10 @@ void WelsNonZeroCount_neon (int8_t* pNonZeroCount);
void DeblockingBSCalcEnc_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride,
uint8_t (*pBS)[4][4]);
#endif
#if defined(HAVE_NEON_AARCH64)
void WelsNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
void DeblockingBSCalcEnc_AArch64_neon (int8_t* pNzc, SMVUnitXY* pMv, int32_t iBoundryFlag, int32_t iMbStride, uint8_t (*pBS)[4][4]);
#endif
#if defined(__cplusplus)
}
#endif//__cplusplus

View File

@ -573,6 +573,27 @@ void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2
}
#endif
#if defined(HAVE_NEON_AARCH64) && defined(SINGLE_REF_FRAME)
void DeblockingBSCalc_AArch64_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
DeblockingBSCalcEnc_AArch64_neon (pCurMb->pNonZeroCount, pCurMb->sMv, pCurMb->uiNeighborAvail, iMbStride, uiBS);
if (iLeftFlag) {
if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
* (uint32_t*)uiBS[0][0] = 0x04040404;
}
} else {
* (uint32_t*)uiBS[0][0] = 0;
}
if (iTopFlag) {
if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
* (uint32_t*)uiBS[1][0] = 0x04040404;
}
} else {
* (uint32_t*)uiBS[1][0] = 0;
}
}
#endif
void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
if (iLeftFlag) {
@ -765,6 +786,11 @@ void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu)
*pfSetNZCZero = WelsNonZeroCount_neon;
}
#endif
#ifdef HAVE_NEON_AARCH64
if (iCpu & WELS_CPU_NEON) {
*pfSetNZCZero = WelsNonZeroCount_AArch64_neon;
}
#endif
}
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
@ -811,6 +837,24 @@ void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
#endif
}
#endif
#if defined(HAVE_NEON_AARCH64)
if (iCpu & WELS_CPU_NEON) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_AArch64_neon;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_AArch64_neon;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_AArch64_neon;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_AArch64_neon;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
#if defined(SINGLE_REF_FRAME)
pFunc->pfDeblockingBSCalc = DeblockingBSCalc_AArch64_neon;
#endif
}
#endif
}

View File

@ -127,3 +127,20 @@ GENERATE_CHROMA_UT (ChromaLt4H_neon, DeblockChromaLt4H_neon, DeblockChromaLt4H_c
GENERATE_CHROMA_UT (ChromaEq4V_neon, DeblockChromaEq4V_neon_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_NEON, 0)
GENERATE_CHROMA_UT (ChromaEq4H_neon, DeblockChromaEq4H_neon_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_NEON, 1)
#endif
#if defined(HAVE_NEON_AARCH64)
WRAP_LUMA_FUNC (DeblockLumaEq4V_AArch64_neon)
WRAP_LUMA_FUNC (DeblockLumaEq4H_AArch64_neon)
WRAP_CHROMA_FUNC (DeblockChromaEq4V_AArch64_neon)
WRAP_CHROMA_FUNC (DeblockChromaEq4H_AArch64_neon)
GENERATE_LUMA_UT (LumaLt4V_AArch64_neon, DeblockLumaLt4V_AArch64_neon, DeblockLumaLt4V_c, WELS_CPU_NEON, 0)
GENERATE_LUMA_UT (LumaLt4H_AArch64_neon, DeblockLumaLt4H_AArch64_neon, DeblockLumaLt4H_c, WELS_CPU_NEON, 1)
GENERATE_LUMA_UT (LumaEq4V_AArch64_neon, DeblockLumaEq4V_AArch64_neon_wrap, DeblockLumaEq4V_c_wrap, WELS_CPU_NEON, 0)
GENERATE_LUMA_UT (LumaEq4H_AArch64_neon, DeblockLumaEq4H_AArch64_neon_wrap, DeblockLumaEq4H_c_wrap, WELS_CPU_NEON, 1)
GENERATE_CHROMA_UT (ChromaLt4V_AArch64_neon, DeblockChromaLt4V_AArch64_neon, DeblockChromaLt4V_c, WELS_CPU_NEON, 0)
GENERATE_CHROMA_UT (ChromaLt4H_AArch64_neon, DeblockChromaLt4H_AArch64_neon, DeblockChromaLt4H_c, WELS_CPU_NEON, 1)
GENERATE_CHROMA_UT (ChromaEq4V_AArch64_neon, DeblockChromaEq4V_AArch64_neon_wrap, DeblockChromaEq4V_c_wrap, WELS_CPU_NEON, 0)
GENERATE_CHROMA_UT (ChromaEq4H_AArch64_neon, DeblockChromaEq4H_AArch64_neon_wrap, DeblockChromaEq4H_c_wrap, WELS_CPU_NEON, 1)
#endif