Merge pull request #439 from zhilwang/mc-arm-asm

mv mc_neon.S to common,add MC arm code to encoder
This commit is contained in:
Licai Guo 2014-03-07 16:36:48 +08:00
commit 1b9aae8434
10 changed files with 902 additions and 2147 deletions

View File

@ -8,6 +8,7 @@
/* Begin PBXBuildFile section */
4C34067D18C5C94C00DFA14A /* expand_picture_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34067C18C5C94C00DFA14A /* expand_picture_neon.S */; };
4C34069B18C96CB000DFA14A /* mc_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34069A18C96CB000DFA14A /* mc_neon.S */; };
4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
4CE443E718B722CD0017DF25 /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443E618B722CD0017DF25 /* XCTest.framework */; };
4CE443E818B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
@ -48,6 +49,7 @@
/* Begin PBXFileReference section */
4C34067C18C5C94C00DFA14A /* expand_picture_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = expand_picture_neon.S; sourceTree = "<group>"; };
4C34069A18C96CB000DFA14A /* mc_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mc_neon.S; sourceTree = "<group>"; };
4CE443D518B722CD0017DF25 /* libcommon.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libcommon.a; sourceTree = BUILT_PRODUCTS_DIR; };
4CE443D818B722CD0017DF25 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
4CE443E518B722CD0017DF25 /* commonTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = commonTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
@ -150,6 +152,7 @@
4CE4472F18BC61650017DF25 /* common */ = {
isa = PBXGroup;
children = (
4C34069A18C96CB000DFA14A /* mc_neon.S */,
4C34067C18C5C94C00DFA14A /* expand_picture_neon.S */,
4CE447BE18C085900017DF25 /* arm_arch_common_macro.S */,
4CE447BC18C085320017DF25 /* deblocking_neon.S */,
@ -256,6 +259,7 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
4C34069B18C96CB000DFA14A /* mc_neon.S in Sources */,
4CE447BF18C085900017DF25 /* arm_arch_common_macro.S in Sources */,
4CE4475018BC61650017DF25 /* deblocking_common.cpp in Sources */,
4CE4474C18BC61650017DF25 /* cpu.cpp in Sources */,

View File

@ -38,7 +38,6 @@
4CE4469F18BC5EAB0017DF25 /* welsDecoderExt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4468518BC5EAB0017DF25 /* welsDecoderExt.cpp */; };
4CE447AC18BC6BE90017DF25 /* block_add_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CE447A718BC6BE90017DF25 /* block_add_neon.S */; };
4CE447AE18BC6BE90017DF25 /* intra_pred_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CE447A918BC6BE90017DF25 /* intra_pred_neon.S */; };
4CE447AF18BC6BE90017DF25 /* mc_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CE447AA18BC6BE90017DF25 /* mc_neon.S */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
@ -132,7 +131,6 @@
4CE4468518BC5EAB0017DF25 /* welsDecoderExt.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = welsDecoderExt.cpp; sourceTree = "<group>"; };
4CE447A718BC6BE90017DF25 /* block_add_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = block_add_neon.S; sourceTree = "<group>"; };
4CE447A918BC6BE90017DF25 /* intra_pred_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = intra_pred_neon.S; sourceTree = "<group>"; };
4CE447AA18BC6BE90017DF25 /* mc_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mc_neon.S; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -325,7 +323,6 @@
children = (
4CE447A718BC6BE90017DF25 /* block_add_neon.S */,
4CE447A918BC6BE90017DF25 /* intra_pred_neon.S */,
4CE447AA18BC6BE90017DF25 /* mc_neon.S */,
);
path = arm;
sourceTree = "<group>";
@ -411,7 +408,6 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
4CE447AF18BC6BE90017DF25 /* mc_neon.S in Sources */,
4CE4469B18BC5EAB0017DF25 /* pic_queue.cpp in Sources */,
4CE4469F18BC5EAB0017DF25 /* welsDecoderExt.cpp in Sources */,
4CE4469318BC5EAB0017DF25 /* fmo.cpp in Sources */,

View File

@ -9,7 +9,6 @@
/* Begin PBXBuildFile section */
4C34066D18C57D0400DFA14A /* intra_pred_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066618C57D0400DFA14A /* intra_pred_neon.S */; };
4C34066E18C57D0400DFA14A /* intra_pred_sad_3_opt_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066718C57D0400DFA14A /* intra_pred_sad_3_opt_neon.S */; };
4C34066F18C57D0400DFA14A /* mc_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066818C57D0400DFA14A /* mc_neon.S */; };
4C34067018C57D0400DFA14A /* memory_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066918C57D0400DFA14A /* memory_neon.S */; };
4C34067118C57D0400DFA14A /* pixel_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066A18C57D0400DFA14A /* pixel_neon.S */; };
4C34067218C57D0400DFA14A /* reconstruct_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066B18C57D0400DFA14A /* reconstruct_neon.S */; };
@ -79,7 +78,6 @@
/* Begin PBXFileReference section */
4C34066618C57D0400DFA14A /* intra_pred_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = intra_pred_neon.S; sourceTree = "<group>"; };
4C34066718C57D0400DFA14A /* intra_pred_sad_3_opt_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = intra_pred_sad_3_opt_neon.S; sourceTree = "<group>"; };
4C34066818C57D0400DFA14A /* mc_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mc_neon.S; sourceTree = "<group>"; };
4C34066918C57D0400DFA14A /* memory_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = memory_neon.S; sourceTree = "<group>"; };
4C34066A18C57D0400DFA14A /* pixel_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = pixel_neon.S; sourceTree = "<group>"; };
4C34066B18C57D0400DFA14A /* reconstruct_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = reconstruct_neon.S; sourceTree = "<group>"; };
@ -206,7 +204,6 @@
children = (
4C34066618C57D0400DFA14A /* intra_pred_neon.S */,
4C34066718C57D0400DFA14A /* intra_pred_sad_3_opt_neon.S */,
4C34066818C57D0400DFA14A /* mc_neon.S */,
4C34066918C57D0400DFA14A /* memory_neon.S */,
4C34066A18C57D0400DFA14A /* pixel_neon.S */,
4C34066B18C57D0400DFA14A /* reconstruct_neon.S */,
@ -492,7 +489,6 @@
4CE4472A18BC605C0017DF25 /* utils.cpp in Sources */,
4CE4471018BC605C0017DF25 /* decode_mb_aux.cpp in Sources */,
4CE4472018BC605C0017DF25 /* sample.cpp in Sources */,
4C34066F18C57D0400DFA14A /* mc_neon.S in Sources */,
4CE4472D18BC605C0017DF25 /* welsCodecTrace.cpp in Sources */,
4CE4471318BC605C0017DF25 /* encoder_data_tables.cpp in Sources */,
4C34067118C57D0400DFA14A /* pixel_neon.S in Sources */,

View File

@ -82,6 +82,18 @@ void McHorVer02WidthEq4_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* p
void McHorVer22WidthEq16_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer22WidthEq8_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer22WidthEq4_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void PixStrideAvgWidthEq16_neon(uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
void PixStrideAvgWidthEq8_neon(uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
void McHorVer20Width17_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);// width+1
void McHorVer20Width9_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);// width+1
void McHorVer02Height17_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);// height+1
void McHorVer02Height9_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);// height+1
void McHorVer22Width17_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);//width+1&&height+1
void McHorVer22Width9_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);//width+1&&height+1
#endif
#if defined(X86_ASM)

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,7 @@ ifeq ($(ASM_ARCH), arm)
COMMON_ASM_S_SRCS=\
$(COMMON_SRCDIR)/deblocking_neon.S\
$(COMMON_SRCDIR)/expand_picture_neon.S\
$(COMMON_SRCDIR)/mc_neon.S\
COMMON_OBJS += $(COMMON_ASM_S_SRCS:.S=.o)
endif

File diff suppressed because it is too large Load Diff

View File

@ -38,7 +38,6 @@ ifeq ($(ASM_ARCH), arm)
DECODER_ASM_S_SRCS=\
$(DECODER_SRCDIR)/core/arm/block_add_neon.S\
$(DECODER_SRCDIR)/core/arm/intra_pred_neon.S\
$(DECODER_SRCDIR)/core/arm/mc_neon.S\
DECODER_OBJS += $(DECODER_ASM_S_SRCS:.S=.o)
endif

View File

@ -480,6 +480,99 @@ void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int
}
#endif //X86_ASM
//***************************************************************************//
// NEON implementation //
//***************************************************************************//
#if defined(HAVE_NEON)
void McHorVer20Width9Or17_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight) {
if (iWidth == 17)
McHorVer20Width17_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
else //if (iWidth == 9)
McHorVer20Width9_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void McHorVer02Height9Or17_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight){
if (iWidth == 16)
McHorVer02Height17_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
else //if (iWidth == 8)
McHorVer02Height9_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void McHorVer22Width9Or17Height9Or17_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight){
if (iWidth == 17)
McHorVer22Width17_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
else //if (iWidth == 9)
McHorVer22Width9_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void EncMcHorVer11_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_neon(pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_neon(pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcHorVer12_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer02WidthEq16_neon(pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_neon(pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcHorVer13_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_neon(pSrc+iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_neon(pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcHorVer21_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_neon(pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_neon(pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcHorVer23_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_neon(pSrc+iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_neon(pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcHorVer31_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_neon(pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_neon(pSrc+1, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcHorVer32_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer02WidthEq16_neon(pSrc+1, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_neon(pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcHorVer33_neon(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D(uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_neon(pSrc+iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_neon(pSrc+1, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_neon(pDst, iDstStride, pTmp, &pTmp[256], iHeight);
}
void EncMcChroma_neon(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) {
const int32_t kiD8x = sMv.iMvX&0x07;
const int32_t kiD8y = sMv.iMvY&0x07;
if (0 == kiD8x && 0 == kiD8y) {
if(8 == iWidth)
McCopyWidthEq8_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
else // iWidth == 4
McCopyWidthEq4_neon(pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
else {
if(8 == iWidth)
McChromaWidthEq8_neon(pSrc, iSrcStride, pDst, iDstStride, (int32_t*)(g_kuiABCD[kiD8y][kiD8x]), iHeight);
else //if(4 == iWidth)
McChromaWidthEq4_neon(pSrc, iSrcStride, pDst, iDstStride, (int32_t*)(g_kuiABCD[kiD8y][kiD8x]), iHeight);
}
}
#endif
typedef void (*PixelAvgFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t);
void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
static PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
@ -498,6 +591,14 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
};
#endif
#if defined(HAVE_NEON)
static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
McCopyWidthEq16_neon, McHorVer10WidthEq16_neon, McHorVer20WidthEq16_neon, McHorVer30WidthEq16_neon,
McHorVer01WidthEq16_neon, EncMcHorVer11_neon, EncMcHorVer21_neon, EncMcHorVer31_neon,
McHorVer02WidthEq16_neon, EncMcHorVer12_neon, McHorVer22WidthEq16_neon, EncMcHorVer32_neon,
McHorVer03WidthEq16_neon, EncMcHorVer13_neon, EncMcHorVer23_neon, EncMcHorVer33_neon
};
#endif
pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20_c;
pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02_c;
@ -538,5 +639,17 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
}
#endif //(X86_ASM)
#if defined(HAVE_NEON)
if (uiCpuFlag & WELS_CPU_NEON) {
pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16_neon;
pFuncList->sMcFuncs.pfChromaMc = EncMcChroma_neon;
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
}
#endif
}
}

View File

@ -51,7 +51,6 @@ ifeq ($(ASM_ARCH), arm)
ENCODER_ASM_S_SRCS=\
$(ENCODER_SRCDIR)/core/arm/intra_pred_neon.S\
$(ENCODER_SRCDIR)/core/arm/intra_pred_sad_3_opt_neon.S\
$(ENCODER_SRCDIR)/core/arm/mc_neon.S\
$(ENCODER_SRCDIR)/core/arm/memory_neon.S\
$(ENCODER_SRCDIR)/core/arm/pixel_neon.S\
$(ENCODER_SRCDIR)/core/arm/reconstruct_neon.S\