diff --git a/codec/build/iOS/common/common.xcodeproj/project.pbxproj b/codec/build/iOS/common/common.xcodeproj/project.pbxproj index f6b13f37..e65bbae3 100644 --- a/codec/build/iOS/common/common.xcodeproj/project.pbxproj +++ b/codec/build/iOS/common/common.xcodeproj/project.pbxproj @@ -19,6 +19,7 @@ 4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; }; 53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 53C1C9BB193F0FB000404D8F /* expand_pic.cpp */; }; 5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */; }; + 5BDD15ED1A79027600B6CA2E /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5BDD15EC1A79027600B6CA2E /* mc.cpp */; }; F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0B204F818FD23BF005DA23F /* copy_mb.cpp */; }; F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8221906673900E156A8 /* arm_arch64_common_macro.S */; }; F556A8251906673900E156A8 /* expand_picture_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */; }; @@ -54,7 +55,6 @@ 4C3406BA18D96EA600DFA14A /* deblocking_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deblocking_common.h; sourceTree = ""; }; 4C3406BD18D96EA600DFA14A /* ls_defines.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ls_defines.h; sourceTree = ""; }; 4C3406BE18D96EA600DFA14A /* macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = macros.h; sourceTree = ""; }; - 4C3406BF18D96EA600DFA14A /* mc_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc_common.h; sourceTree = ""; }; 4C3406C018D96EA600DFA14A /* measure_time.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = measure_time.h; sourceTree = ""; }; 4C3406C118D96EA600DFA14A /* typedefs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = typedefs.h; sourceTree = ""; }; 4C3406C218D96EA600DFA14A /* WelsThreadLib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WelsThreadLib.h; sourceTree = ""; }; @@ -70,6 +70,8 @@ 53C1C9BB193F0FB000404D8F /* expand_pic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expand_pic.cpp; sourceTree = ""; }; 5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wels_common_defs.h; sourceTree = ""; }; 5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common_tables.cpp; sourceTree = ""; }; + 5BDD15EB1A79026A00B6CA2E /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = ""; }; + 5BDD15EC1A79027600B6CA2E /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = ""; }; F0B204F718FD23B6005DA23F /* copy_mb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = copy_mb.h; sourceTree = ""; }; F0B204F818FD23BF005DA23F /* copy_mb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = copy_mb.cpp; sourceTree = ""; }; F556A8221906673900E156A8 /* arm_arch64_common_macro.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = arm_arch64_common_macro.S; path = arm64/arm_arch64_common_macro.S; sourceTree = ""; }; @@ -123,7 +125,7 @@ 4C3406BA18D96EA600DFA14A /* deblocking_common.h */, 4C3406BD18D96EA600DFA14A /* ls_defines.h */, 4C3406BE18D96EA600DFA14A /* macros.h */, - 4C3406BF18D96EA600DFA14A /* mc_common.h */, + 5BDD15EB1A79026A00B6CA2E /* mc.h */, 4C3406C018D96EA600DFA14A /* measure_time.h */, 4C3406C118D96EA600DFA14A /* typedefs.h */, 5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */, @@ -143,6 +145,7 @@ 4C3406C518D96EA600DFA14A /* crt_util_safe_x.cpp */, 53C1C9BB193F0FB000404D8F /* expand_pic.cpp */, 4C3406C618D96EA600DFA14A /* deblocking_common.cpp */, + 5BDD15EC1A79027600B6CA2E /* mc.cpp */, 4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */, ); path = src; @@ -253,6 +256,7 @@ F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */, 4C3406C918D96EA600DFA14A /* arm_arch_common_macro.S in Sources */, F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */, + 5BDD15ED1A79027600B6CA2E /* mc.cpp in Sources */, F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */, 4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */, F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */, diff --git a/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj b/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj index 232ba4cc..ec8662f5 100644 --- a/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj +++ b/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj @@ -20,7 +20,6 @@ 4CE4469318BC5EAB0017DF25 /* fmo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467018BC5EAA0017DF25 /* fmo.cpp */; }; 4CE4469418BC5EAB0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */; }; 4CE4469518BC5EAB0017DF25 /* manage_dec_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */; }; - 4CE4469618BC5EAB0017DF25 /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467318BC5EAA0017DF25 /* mc.cpp */; }; 4CE4469718BC5EAB0017DF25 /* mem_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467418BC5EAA0017DF25 /* mem_align.cpp */; }; 4CE4469818BC5EAB0017DF25 /* memmgr_nal_unit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */; }; 4CE4469918BC5EAB0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */; }; @@ -73,7 +72,6 @@ 4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = manage_dec_ref.h; sourceTree = ""; }; 4CE4465518BC5EAA0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = ""; }; - 4CE4465618BC5EAA0017DF25 /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = ""; }; 4CE4465718BC5EAA0017DF25 /* mem_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mem_align.h; sourceTree = ""; }; 4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memmgr_nal_unit.h; sourceTree = ""; }; 4CE4465918BC5EAA0017DF25 /* mv_pred.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mv_pred.h; sourceTree = ""; }; @@ -99,7 +97,6 @@ 4CE4467018BC5EAA0017DF25 /* fmo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fmo.cpp; sourceTree = ""; }; 4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = ""; }; 4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = manage_dec_ref.cpp; sourceTree = ""; }; - 4CE4467318BC5EAA0017DF25 /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = ""; tabWidth = 1; usesTabs = 0; wrapsLines = 1; }; 4CE4467418BC5EAA0017DF25 /* mem_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mem_align.cpp; sourceTree = ""; }; 4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memmgr_nal_unit.cpp; sourceTree = ""; }; 4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = ""; }; @@ -218,7 +215,6 @@ 4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */, 4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */, 4CE4465518BC5EAA0017DF25 /* mb_cache.h */, - 4CE4465618BC5EAA0017DF25 /* mc.h */, 4CE4465718BC5EAA0017DF25 /* mem_align.h */, 4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */, 4CE4465918BC5EAA0017DF25 /* mv_pred.h */, @@ -256,7 +252,6 @@ 4CE4467018BC5EAA0017DF25 /* fmo.cpp */, 4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */, 4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */, - 4CE4467318BC5EAA0017DF25 /* mc.cpp */, 4CE4467418BC5EAA0017DF25 /* mem_align.cpp */, 4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */, 4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */, @@ -375,7 +370,6 @@ 4CBC1B81194AC4E100214D9E /* intra_pred_aarch64_neon.S in Sources */, 4CE4469018BC5EAB0017DF25 /* decoder_core.cpp in Sources */, 4CE447AE18BC6BE90017DF25 /* intra_pred_neon.S in Sources */, - 4CE4469618BC5EAB0017DF25 /* mc.cpp in Sources */, 4CE4469C18BC5EAB0017DF25 /* rec_mb.cpp in Sources */, 4CE4468B18BC5EAB0017DF25 /* bit_stream.cpp in Sources */, 4CE4468D18BC5EAB0017DF25 /* decode_mb_aux.cpp in Sources */, diff --git a/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj b/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj index e3f4d3f9..8b624ea5 100644 --- a/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj +++ b/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj @@ -24,7 +24,6 @@ 4CE4471318BC605C0017DF25 /* encoder_data_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */; }; 4CE4471418BC605C0017DF25 /* encoder_ext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E318BC605C0017DF25 /* encoder_ext.cpp */; }; 4CE4471618BC605C0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */; }; - 4CE4471718BC605C0017DF25 /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E618BC605C0017DF25 /* mc.cpp */; }; 4CE4471818BC605C0017DF25 /* md.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E718BC605C0017DF25 /* md.cpp */; }; 4CE4471918BC605C0017DF25 /* memory_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E818BC605C0017DF25 /* memory_align.cpp */; }; 4CE4471A18BC605C0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E918BC605C0017DF25 /* mv_pred.cpp */; }; @@ -93,7 +92,6 @@ 4CE446B518BC605C0017DF25 /* extern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = extern.h; sourceTree = ""; }; 4CE446B618BC605C0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = ""; }; 4CE446B718BC605C0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = ""; }; - 4CE446B818BC605C0017DF25 /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = ""; }; 4CE446B918BC605C0017DF25 /* md.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = md.h; sourceTree = ""; }; 4CE446BA18BC605C0017DF25 /* memory_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory_align.h; sourceTree = ""; }; 4CE446BB18BC605C0017DF25 /* mt_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mt_defs.h; sourceTree = ""; }; @@ -135,7 +133,6 @@ 4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_data_tables.cpp; sourceTree = ""; }; 4CE446E318BC605C0017DF25 /* encoder_ext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_ext.cpp; sourceTree = ""; }; 4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = ""; }; - 4CE446E618BC605C0017DF25 /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = ""; }; 4CE446E718BC605C0017DF25 /* md.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = md.cpp; sourceTree = ""; }; 4CE446E818BC605C0017DF25 /* memory_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memory_align.cpp; sourceTree = ""; }; 4CE446E918BC605C0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = ""; }; @@ -276,7 +273,6 @@ 4CE446B518BC605C0017DF25 /* extern.h */, 4CE446B618BC605C0017DF25 /* get_intra_predictor.h */, 4CE446B718BC605C0017DF25 /* mb_cache.h */, - 4CE446B818BC605C0017DF25 /* mc.h */, 4CE446B918BC605C0017DF25 /* md.h */, 4CE446BA18BC605C0017DF25 /* memory_align.h */, 4CE446BB18BC605C0017DF25 /* mt_defs.h */, @@ -328,7 +324,6 @@ 4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */, 4CE446E318BC605C0017DF25 /* encoder_ext.cpp */, 4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */, - 4CE446E618BC605C0017DF25 /* mc.cpp */, 4CE446E718BC605C0017DF25 /* md.cpp */, 4CE446E818BC605C0017DF25 /* memory_align.cpp */, 4CE446E918BC605C0017DF25 /* mv_pred.cpp */, @@ -455,7 +450,6 @@ 4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */, F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */, 4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */, - 4CE4471718BC605C0017DF25 /* mc.cpp in Sources */, F7E9994519EBD1E9009B1021 /* svc_set_mb_syn_cabac.cpp in Sources */, F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */, 4CE4472918BC605C0017DF25 /* svc_set_mb_syn_cavlc.cpp in Sources */, diff --git a/codec/build/win32/dec/WelsDecCore.vcproj b/codec/build/win32/dec/WelsDecCore.vcproj index 64f63cd8..b70606a3 100644 --- a/codec/build/win32/dec/WelsDecCore.vcproj +++ b/codec/build/win32/dec/WelsDecCore.vcproj @@ -744,11 +744,7 @@ > - - - - > 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); +} + #endif //X86_ASM //***************************************************************************// // NEON implementation // //***************************************************************************// #if defined(HAVE_NEON) +void McHorVer20Width9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer20Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 9) + McHorVer20Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer02Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer02Height17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 8) + McHorVer02Height9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer22Width9Or17Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer22Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 9) + McHorVer22Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} void McCopy_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { if (16 == iWidth) @@ -941,8 +988,38 @@ void McChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int3 McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight); } } +void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { + PixStrideAvgWidthEq8_neon, + PixStrideAvgWidthEq16_neon + }; + kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); +} #endif #if defined(HAVE_NEON_AARCH64) +void McHorVer20Width9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer20Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 9) + McHorVer20Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer02Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 16) + McHorVer02Height17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 8) + McHorVer02Height9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} +void McHorVer22Width9Or17Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, + int32_t iDstStride, + int32_t iWidth, int32_t iHeight) { + if (iWidth == 17) + McHorVer22Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); + else //if (iWidth == 9) + McHorVer22Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} void McCopy_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { if (16 == iWidth) @@ -1194,33 +1271,58 @@ void McChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pD McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight); } } +void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, + const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { + static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { + PixStrideAvgWidthEq8_AArch64_neon, + PixStrideAvgWidthEq16_AArch64_neon + }; + kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); +} #endif -void InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu) { - pMcFunc->pMcLumaFunc = McLuma_c; - pMcFunc->pMcChromaFunc = McChroma_c; +void InitMcFunc (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) { + pMcFuncs->pfLumaHalfpelHor = McHorVer20_c; + pMcFuncs->pfLumaHalfpelVer = McHorVer02_c; + pMcFuncs->pfLumaHalfpelCen = McHorVer22_c; + pMcFuncs->pfSampleAveraging = PixelAvg_c; + pMcFuncs->pMcChromaFunc = McChroma_c; + pMcFuncs->pMcLumaFunc = McLuma_c; -#ifdef HAVE_NEON - if (iCpu & WELS_CPU_NEON) { - pMcFunc->pMcLumaFunc = McLuma_neon; - pMcFunc->pMcChromaFunc = McChroma_neon; - } -#endif -#ifdef HAVE_NEON_AARCH64 - if (iCpu & WELS_CPU_NEON) { - pMcFunc->pMcLumaFunc = McLuma_AArch64_neon; - pMcFunc->pMcChromaFunc = McChroma_AArch64_neon; - } -#endif #if defined (X86_ASM) - if (iCpu & WELS_CPU_SSE2) { - pMcFunc->pMcLumaFunc = McLuma_sse2; - pMcFunc->pMcChromaFunc = McChroma_sse2; + if (uiCpuFlag & WELS_CPU_SSE2) { + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2; + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2; + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2; + pMcFuncs->pfSampleAveraging = PixelAvg_sse2; + pMcFuncs->pMcChromaFunc = McChroma_sse2; + pMcFuncs->pMcLumaFunc = McLuma_sse2; } - if (iCpu & WELS_CPU_SSSE3) { - pMcFunc->pMcChromaFunc = McChroma_ssse3; + + if (uiCpuFlag & WELS_CPU_SSSE3) { + pMcFuncs->pMcChromaFunc = McChroma_ssse3; } #endif //(X86_ASM) -} -} // namespace WelsDec +#if defined(HAVE_NEON) + if (uiCpuFlag & WELS_CPU_NEON) { + pMcFuncs->pMcLumaFunc = McLuma_neon; + pMcFuncs->pMcChromaFunc = McChroma_neon; + pMcFuncs->pfSampleAveraging = PixelAvg_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1 + } +#endif +#if defined(HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pMcFuncs->pMcLumaFunc = McLuma_AArch64_neon; + pMcFuncs->pMcChromaFunc = McChroma_AArch64_neon; + pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1 + } +#endif +} +} // namespace WelsCommon diff --git a/codec/common/targets.mk b/codec/common/targets.mk index 507455ab..da17491b 100644 --- a/codec/common/targets.mk +++ b/codec/common/targets.mk @@ -7,6 +7,7 @@ COMMON_CPP_SRCS=\ $(COMMON_SRCDIR)/src/deblocking_common.cpp\ $(COMMON_SRCDIR)/src/expand_pic.cpp\ $(COMMON_SRCDIR)/src/intra_pred_common.cpp\ + $(COMMON_SRCDIR)/src/mc.cpp\ $(COMMON_SRCDIR)/src/sad_common.cpp\ $(COMMON_SRCDIR)/src/utils.cpp\ $(COMMON_SRCDIR)/src/welsCodecTrace.cpp\ diff --git a/codec/decoder/core/inc/decoder_context.h b/codec/decoder/core/inc/decoder_context.h index 00c11b9b..30e16b1f 100644 --- a/codec/decoder/core/inc/decoder_context.h +++ b/codec/decoder/core/inc/decoder_context.h @@ -55,6 +55,7 @@ #include "crt_util_safe_x.h" #include "mb_cache.h" #include "expand_pic.h" +#include "mc.h" namespace WelsDec { #define MAX_PRED_MODE_ID_I16x16 3 @@ -142,13 +143,6 @@ uint8_t uiLongRefCount[LIST_A]; // dependend on ref pic module int32_t iMaxLongTermFrameIdx; } SRefPic, *PRefPic; -typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight); -typedef struct TagMcFunc { -PWelsMcFunc pMcLumaFunc; -PWelsMcFunc pMcChromaFunc; -} SMcFunc; - typedef void (*PCopyFunc) (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); typedef struct TagCopyFunc { PCopyFunc pCopyLumaFunc; diff --git a/codec/decoder/core/inc/mc.h b/codec/decoder/core/inc/mc.h deleted file mode 100644 index 4ae2243a..00000000 --- a/codec/decoder/core/inc/mc.h +++ /dev/null @@ -1,50 +0,0 @@ -/*! - * \copy - * Copyright (c) 2013, Cisco Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef WELS_MC_H__ -#define WELS_MC_H__ - -#include "wels_const.h" -#include "macros.h" -#include "decoder_context.h" -#include "mc_common.h" - -namespace WelsDec { - -typedef void (*PMcChromaWidthExtFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - const uint8_t* kpABCD, int32_t iHeight); - -void InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu); - -} // namespace WelsDec - -#endif//WELS_MC_H__ diff --git a/codec/decoder/targets.mk b/codec/decoder/targets.mk index 05c44725..a9ee1042 100644 --- a/codec/decoder/targets.mk +++ b/codec/decoder/targets.mk @@ -13,7 +13,6 @@ DECODER_CPP_SRCS=\ $(DECODER_SRCDIR)/core/src/fmo.cpp\ $(DECODER_SRCDIR)/core/src/get_intra_predictor.cpp\ $(DECODER_SRCDIR)/core/src/manage_dec_ref.cpp\ - $(DECODER_SRCDIR)/core/src/mc.cpp\ $(DECODER_SRCDIR)/core/src/mem_align.cpp\ $(DECODER_SRCDIR)/core/src/memmgr_nal_unit.cpp\ $(DECODER_SRCDIR)/core/src/mv_pred.cpp\ diff --git a/codec/encoder/core/inc/mc.h b/codec/encoder/core/inc/mc.h deleted file mode 100644 index c8aa68ab..00000000 --- a/codec/encoder/core/inc/mc.h +++ /dev/null @@ -1,51 +0,0 @@ -/*! - * \copy - * Copyright (c) 2013, Cisco Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ - -//macroblock.h -#ifndef WELS_MC_H__ -#define WELS_MC_H__ - -#include -#include "typedefs.h" -#include "wels_const.h" -#include "macros.h" -#include "wels_func_ptr_def.h" -#include "mc_common.h" - -/////////////////////luma MC////////////////////////// -//x y means dx(mv[0] & 3) and dy(mv[1] & 3) - -namespace WelsEnc { -void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag); - -} -#endif//WELS_MC_H__ diff --git a/codec/encoder/core/inc/wels_func_ptr_def.h b/codec/encoder/core/inc/wels_func_ptr_def.h index b4870fb4..a5917927 100644 --- a/codec/encoder/core/inc/wels_func_ptr_def.h +++ b/codec/encoder/core/inc/wels_func_ptr_def.h @@ -44,6 +44,7 @@ #include "expand_pic.h" #include "rc.h" #include "IWelsVP.h" +#include "mc.h" namespace WelsEnc { @@ -74,25 +75,6 @@ typedef int32_t (*PQuantizationSkipFunc) (int16_t* pDct, int16_t iFF, int16_t i typedef int32_t (*PQuantizationHadamardFunc) (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock); -typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight); - -typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, int32_t iHeight); -typedef void (*PWelsLumaQuarpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight); -typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t, int32_t); - -typedef struct TagMcFunc { - PWelsLumaHalfpelMcFunc pfLumaHalfpelHor; - PWelsLumaHalfpelMcFunc pfLumaHalfpelVer; - PWelsLumaHalfpelMcFunc pfLumaHalfpelCen; - PWelsMcFunc pMcChromaFunc; - - PWelsMcFunc pMcLumaFunc; - PWelsSampleAveragingFunc pfSampleAveraging; -} SMcFunc; - typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc); typedef void (*PLumaDeblockingEQ4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta); typedef void (*PChromaDeblockingLT4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha, diff --git a/codec/encoder/core/src/encoder.cpp b/codec/encoder/core/src/encoder.cpp index db71bb82..719f030a 100644 --- a/codec/encoder/core/src/encoder.cpp +++ b/codec/encoder/core/src/encoder.cpp @@ -209,7 +209,7 @@ int32_t InitFunctionPointers (sWelsEncCtx* pEncCtx, SWelsSvcCodingParam* pParam, /* Motion compensation */ /*init pixel average function*/ /*get one column or row pixel when refinement*/ - WelsInitMcFuncs (&pFuncList->sMcFuncs, uiCpuFlag); + InitMcFunc (&pFuncList->sMcFuncs, uiCpuFlag); InitCoeffFunc (pFuncList,uiCpuFlag,pParam->iEntropyCodingModeFlag); WelsInitEncodingFuncs (pFuncList, uiCpuFlag); diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp deleted file mode 100644 index f542863b..00000000 --- a/codec/encoder/core/src/mc.cpp +++ /dev/null @@ -1,891 +0,0 @@ -/*! - * \copy - * Copyright (c) 2009-2013, Cisco Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * - * \file mc.c - * - * \brief Interfaces implementation for motion compensation - * - * \date 03/17/2009 Created - * - ************************************************************************************* - */ - -#include "mc.h" -#include "cpu_core.h" - -typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, - int32_t, int32_t); - -namespace WelsEnc { -/*------------------weight for chroma fraction pixel interpolation------------------*/ -//kuiA = (8 - dx) * (8 - dy); -//kuiB = dx * (8 - dy); -//kuiC = (8 - dx) * dy; -//kuiD = dx * dy -static const uint8_t g_kuiABCD[8][8][4] = { ////g_kuiA[dy][dx], g_kuiB[dy][dx], g_kuiC[dy][dx], g_kuiD[dy][dx] - { - {64, 0, 0, 0}, {56, 8, 0, 0}, {48, 16, 0, 0}, {40, 24, 0, 0}, - {32, 32, 0, 0}, {24, 40, 0, 0}, {16, 48, 0, 0}, {8, 56, 0, 0} - }, - { - {56, 0, 8, 0}, {49, 7, 7, 1}, {42, 14, 6, 2}, {35, 21, 5, 3}, - {28, 28, 4, 4}, {21, 35, 3, 5}, {14, 42, 2, 6}, {7, 49, 1, 7} - }, - { - {48, 0, 16, 0}, {42, 6, 14, 2}, {36, 12, 12, 4}, {30, 18, 10, 6}, - {24, 24, 8, 8}, {18, 30, 6, 10}, {12, 36, 4, 12}, {6, 42, 2, 14} - }, - { - {40, 0, 24, 0}, {35, 5, 21, 3}, {30, 10, 18, 6}, {25, 15, 15, 9}, - {20, 20, 12, 12}, {15, 25, 9, 15}, {10, 30, 6, 18}, {5, 35, 3, 21} - }, - { - {32, 0, 32, 0}, {28, 4, 28, 4}, {24, 8, 24, 8}, {20, 12, 20, 12}, - {16, 16, 16, 16}, {12, 20, 12, 20}, {8, 24, 8, 24}, {4, 28, 4, 28} - }, - { - {24, 0, 40, 0}, {21, 3, 35, 5}, {18, 6, 30, 10}, {15, 9, 25, 15}, - {12, 12, 20, 20}, {9, 15, 15, 25}, {6, 18, 10, 30}, {3, 21, 5, 35} - }, - { - {16, 0, 48, 0}, {14, 2, 42, 6}, {12, 4, 36, 12}, {10, 6, 30, 18}, - {8, 8, 24, 24}, {6, 10, 18, 30}, {4, 12, 12, 36}, {2, 14, 6, 42} - }, - { - {8, 0, 56, 0}, {7, 1, 49, 7}, {6, 2, 42, 14}, {5, 3, 35, 21}, - {4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49} - } -}; - -//***************************************************************************// -// C code implementation // -//***************************************************************************// -static inline void McCopyWidthEq4_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - int32_t i; - for (i = 0; i < iHeight; i++) { - memcpy (pDst, pSrc, 4); // confirmed_safe_unsafe_usage - pDst += iDstStride; - pSrc += iSrcStride; - } -} - -static inline void McCopyWidthEq8_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) - -{ - int32_t i; - for (i = 0; i < iHeight; i++) { - memcpy (pDst, pSrc, 8); // confirmed_safe_unsafe_usage - pDst += iDstStride; - pSrc += iSrcStride; - } -} -static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - int32_t i; - for (i = 0; i < iHeight; i++) { - memcpy (pDst, pSrc, 16); // confirmed_safe_unsafe_usage - pDst += iDstStride; - pSrc += iSrcStride; - } -} - -//--------------------Luma sample MC------------------// -static inline int32_t HorFilter_c (const uint8_t* pSrc) { - int32_t iPix05 = pSrc[-2] + pSrc[3]; - int32_t iPix14 = pSrc[-1] + pSrc[2]; - int32_t iPix23 = pSrc[ 0] + pSrc[1]; - - return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2)); -} - -static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) { - int32_t iPix05 = pSrc[0] + pSrc[5]; - int32_t iPix14 = pSrc[1] + pSrc[4]; - int32_t iPix23 = pSrc[2] + pSrc[3]; - - return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2)); -} -static inline int32_t VerFilter_c (const uint8_t* pSrc, const int32_t kiSrcStride) { - const int32_t kiLine1 = kiSrcStride; - const int32_t kiLine2 = (kiSrcStride << 1); - const int32_t kiLine3 = kiLine1 + kiLine2; - const uint32_t kuiPix05 = * (pSrc - kiLine2) + * (pSrc + kiLine3); - const uint32_t kuiPix14 = * (pSrc - kiLine1) + * (pSrc + kiLine2); - const uint32_t kuiPix23 = * (pSrc) + * (pSrc + kiLine1); - - return (kuiPix05 - ((kuiPix14 << 2) + kuiPix14) + (kuiPix23 << 4) + (kuiPix23 << 2)); -} - -static inline void PixelAvgWidthEq8_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, - const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight) { - int32_t i, j; - for (i = 0; i < iHeight; i++) { - for (j = 0; j < 8; j++) { - pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1; - } - pDst += iDstStride; - pSrcA += iSrcAStride; - pSrcB += iSrcBStride; - } -} -static inline void PixelAvgWidthEq16_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, - const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight) { - int32_t i, j; - for (i = 0; i < iHeight; i++) { - for (j = 0; j < 16; j++) { - pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1; - } - pDst += iDstStride; - pSrcA += iSrcAStride; - pSrcB += iSrcBStride; - } -} - -//horizontal filter to gain half sample, that is (2, 0) location in quarter sample -static inline void McHorVer20WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - int32_t i, j; - for (i = 0; i < iHeight; i++) { - for (j = 0; j < 16; j++) { - pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5); - } - pDst += iDstStride; - pSrc += iSrcStride; - } -} -//vertical filter to gain half sample, that is (0, 2) location in quarter sample -static inline void McHorVer02WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - int32_t i, j; - for (i = 0; i < iHeight; i++) { - for (j = 0; j < 16; j++) { - pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5); - } - pDst += iDstStride; - pSrc += iSrcStride; - } -} -//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample -static inline void McHorVer22WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - int16_t pTmp[16 + 5] = {0}; //16 - int32_t i, j, k; - - for (i = 0; i < iHeight; i++) { - for (j = 0; j < 16 + 5; j++) { - pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride); - } - for (k = 0; k < 16; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10); - } - pSrc += iSrcStride; - pDst += iDstStride; - } -} - -/////////////////////luma MC////////////////////////// - -static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} - -static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, - int32_t iHeight) { - int32_t i, j; - for (i = 0; i < iHeight; i++) { - for (j = 0; j < iWidth; j++) { - pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5); - } - pDst += iDstStride; - pSrc += iSrcStride; - } -} -//vertical filter to gain half sample, that is (0, 2) location in quarter sample -static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, - int32_t iHeight) { - int32_t i, j; - for (i = 0; i < iHeight; i++) { - for (j = 0; j < iWidth; j++) { - pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5); - } - pDst += iDstStride; - pSrc += iSrcStride; - } -} -//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample -static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, - int32_t iHeight) { - int16_t pTmp[17 + 5] = {0}; //w+1 - int32_t i, j, k; - - for (i = 0; i < iHeight; i++) { - for (j = 0; j < iWidth + 5; j++) { - pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride); - } - for (k = 0; k < iWidth; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10); - } - pSrc += iSrcStride; - pDst += iDstStride; - } -} -static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, - int32_t iHeight) { - int32_t i; - if (iWidth == 16) - McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else if (iWidth == 8) - McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else if (iWidth == 4) - McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else { - for (i = 0; i < iHeight; i++) { - memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage - pDst += iDstStride; - pSrc += iSrcStride; - } - } -} - -void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) -//pSrc has been added the offset of mv -{ - const int32_t kiDx = iMvX & 0x07; - const int32_t kiDy = iMvY & 0x07; - - if (0 == kiDx && 0 == kiDy) { - McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); - } else { - const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0]; - const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1]; - const int32_t kiDC = g_kuiABCD[kiDy][kiDx][2]; - const int32_t kiDD = g_kuiABCD[kiDy][kiDx][3]; - - int32_t i, j; - - const uint8_t* pSrcNext = pSrc + iSrcStride; - - for (i = 0; i < iHeight; i++) { - for (j = 0; j < iWidth; j++) { - pDst[j] = (kiDA * pSrc[j] + kiDB * pSrc[j + 1] + kiDC * pSrcNext[j] + kiDD * pSrcNext[j + 1] + 32) >> 6; - } - pDst += iDstStride; - pSrc = pSrcNext; - pSrcNext += iSrcStride; - } - } -} -void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x] - McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c, - McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c, - McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c, - McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c - }; - uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03); - pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, - const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { - static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { - PixelAvgWidthEq8_c, - PixelAvgWidthEq16_c - }; - kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); -} -//***************************************************************************// -// MMXEXT and SSE2 implementation // -//***************************************************************************// -#if defined(X86_ASM) - -static inline void McHorVer22WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 21, 8, 16) - McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 16, iHeight + 5); - McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 16, pDst, iDstStride, 8, iHeight); -} - -//2010.2.5 - -static inline void McHorVer02WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* PDst, int32_t iDstStride, - int32_t iHeight) { - McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, PDst, iDstStride, iHeight); - McHorVer02WidthEq8_sse2 (&pSrc[8], iSrcStride, &PDst[8], iDstStride, iHeight); -} -static inline void McHorVer22WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); - McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight); -} -void McHorVer22Width9Or17Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16) - int32_t tmp1 = 2 * (iWidth - 8); - McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5); - McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight); - McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight); -} - -static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16) - - McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight); -} -static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - - McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} - -static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, int32_t iHeight) { - int32_t i; - if (iWidth == 16) - McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else if (iWidth == 8) - McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else if (iWidth == 4) - McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else { - for (i = 0; i < iHeight; i++) { - memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage - pDst += iDstStride; - pSrc += iSrcStride; - } - } -} - -typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - const uint8_t* pABCD, int32_t iHeigh); -void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = iMvX & 0x07; - const int32_t kiD8y = iMvY & 0x07; - static const McChromaWidthEqx kpfFuncs[2] = { - McChromaWidthEq4_mmx, - McChromaWidthEq8_sse2 - }; - - if (0 == kiD8x && 0 == kiD8y) { - McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); - } else { - kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight); - } -} - -void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = iMvX & 0x07; - const int32_t kiD8y = iMvY & 0x07; - - static const McChromaWidthEqx kpfFuncs[2] = { - McChromaWidthEq4_mmx, - McChromaWidthEq8_ssse3 - }; - if (0 == kiD8x && 0 == kiD8y) { - McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); - } else { - kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight); - } - -} - -void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = { - McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2, - McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2, - McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2, - McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2 - }; - uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03); - pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, - const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { - static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { - PixelAvgWidthEq8_mmx, - PixelAvgWidthEq16_sse2 - }; - kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); -} -#endif //X86_ASM - -//***************************************************************************// -// NEON implementation // -//***************************************************************************// -#if defined(HAVE_NEON) -void McHorVer20Width9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, int32_t iHeight) { - if (iWidth == 17) - McHorVer20Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else //if (iWidth == 9) - McHorVer20Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void McHorVer02Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, int32_t iHeight) { - if (iWidth == 16) - McHorVer02Height17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else //if (iWidth == 8) - McHorVer02Height9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void McHorVer22Width9Or17Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, int32_t iHeight) { - if (iWidth == 17) - McHorVer22Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else //if (iWidth == 9) - McHorVer22Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void EncMcHorVer11_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcHorVer12_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer02WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcHorVer13_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcHorVer21_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcHorVer23_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcHorVer31_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcHorVer32_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); -} -void EncMcChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = iMvX & 0x07; - const int32_t kiD8y = iMvY & 0x07; - if (0 == kiD8x && 0 == kiD8y) { - if (8 == iWidth) - McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else // iWidth == 4 - McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - } else { - if (8 == iWidth) - McChromaWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); - else //if(4 == iWidth) - McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); - } -} -void EncMcLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y] - McCopyWidthEq16_neon, McHorVer10WidthEq16_neon, McHorVer20WidthEq16_neon, McHorVer30WidthEq16_neon, - McHorVer01WidthEq16_neon, EncMcHorVer11_neon, EncMcHorVer21_neon, EncMcHorVer31_neon, - McHorVer02WidthEq16_neon, EncMcHorVer12_neon, McHorVer22WidthEq16_neon, EncMcHorVer32_neon, - McHorVer03WidthEq16_neon, EncMcHorVer13_neon, EncMcHorVer23_neon, EncMcHorVer33_neon - }; - uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03); - pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, - const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { - static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { - PixStrideAvgWidthEq8_neon, - PixStrideAvgWidthEq16_neon - }; - kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); -} -#endif - -#if defined(HAVE_NEON_AARCH64) -void McHorVer20Width9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, int32_t iHeight) { - if (iWidth == 17) - McHorVer20Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else //if (iWidth == 9) - McHorVer20Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void McHorVer02Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iWidth, int32_t iHeight) { - if (iWidth == 16) - McHorVer02Height17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else //if (iWidth == 8) - McHorVer02Height9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void McHorVer22Width9Or17Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, - int32_t iDstStride, - int32_t iWidth, int32_t iHeight) { - if (iWidth == 17) - McHorVer22Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else //if (iWidth == 9) - McHorVer22Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void EncMcHorVer11_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcHorVer12_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcHorVer13_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcHorVer21_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcHorVer23_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcHorVer31_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcHorVer32_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pTmp, 16, iHeight); - McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int32_t iHeight) { - ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16) - McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); - McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight); - PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); -} -void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = iMvX & 0x07; - const int32_t kiD8y = iMvY & 0x07; - if (0 == kiD8x && 0 == kiD8y) { - if (8 == iWidth) - McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - else // iWidth == 4 - McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); - } else { - if (8 == iWidth) - McChromaWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); - else //if(4 == iWidth) - McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight); - } -} -void EncMcLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { - static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y] - McCopyWidthEq16_AArch64_neon, McHorVer10WidthEq16_AArch64_neon, McHorVer20WidthEq16_AArch64_neon, McHorVer30WidthEq16_AArch64_neon, - McHorVer01WidthEq16_AArch64_neon, EncMcHorVer11_AArch64_neon, EncMcHorVer21_AArch64_neon, EncMcHorVer31_AArch64_neon, - McHorVer02WidthEq16_AArch64_neon, EncMcHorVer12_AArch64_neon, McHorVer22WidthEq16_AArch64_neon, EncMcHorVer32_AArch64_neon, - McHorVer03WidthEq16_AArch64_neon, EncMcHorVer13_AArch64_neon, EncMcHorVer23_AArch64_neon, EncMcHorVer33_AArch64_neon - }; - uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03); - pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight); -} -void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, - const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) { - static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = { - PixStrideAvgWidthEq8_AArch64_neon, - PixStrideAvgWidthEq16_AArch64_neon - }; - kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight); -} -#endif - -void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) { - pMcFuncs->pfLumaHalfpelHor = McHorVer20_c; - pMcFuncs->pfLumaHalfpelVer = McHorVer02_c; - pMcFuncs->pfLumaHalfpelCen = McHorVer22_c; - pMcFuncs->pfSampleAveraging = PixelAvg_c; - pMcFuncs->pMcChromaFunc = McChroma_c; - pMcFuncs->pMcLumaFunc = McLuma_c; -#if defined (X86_ASM) - if (uiCpuFlag & WELS_CPU_SSE2) { - pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2; - pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2; - pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2; - pMcFuncs->pfSampleAveraging = PixelAvg_sse2; - pMcFuncs->pMcChromaFunc = McChroma_sse2; - pMcFuncs->pMcLumaFunc = McLuma_sse2; - } - - if (uiCpuFlag & WELS_CPU_SSSE3) { - pMcFuncs->pMcChromaFunc = McChroma_ssse3; - } - -#endif //(X86_ASM) - -#if defined(HAVE_NEON) - if (uiCpuFlag & WELS_CPU_NEON) { - pMcFuncs->pMcLumaFunc = EncMcLuma_neon; - pMcFuncs->pMcChromaFunc = EncMcChroma_neon; - pMcFuncs->pfSampleAveraging = PixelAvg_neon; - pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16 - pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16 - pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1 - } -#endif -#if defined(HAVE_NEON_AARCH64) - if (uiCpuFlag & WELS_CPU_NEON) { - pMcFuncs->pMcLumaFunc = EncMcLuma_AArch64_neon; - pMcFuncs->pMcChromaFunc = EncMcChroma_AArch64_neon; - pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon; - pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16 - pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16 - pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1 - } -#endif -} -} diff --git a/codec/encoder/targets.mk b/codec/encoder/targets.mk index d51a27b5..ec5dd88b 100644 --- a/codec/encoder/targets.mk +++ b/codec/encoder/targets.mk @@ -8,7 +8,6 @@ ENCODER_CPP_SRCS=\ $(ENCODER_SRCDIR)/core/src/encoder_data_tables.cpp\ $(ENCODER_SRCDIR)/core/src/encoder_ext.cpp\ $(ENCODER_SRCDIR)/core/src/get_intra_predictor.cpp\ - $(ENCODER_SRCDIR)/core/src/mc.cpp\ $(ENCODER_SRCDIR)/core/src/md.cpp\ $(ENCODER_SRCDIR)/core/src/memory_align.cpp\ $(ENCODER_SRCDIR)/core/src/mv_pred.cpp\ diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index 68bb871b..99ca25af 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -1,8 +1,9 @@ #include #include "codec_def.h" +#include "macros.h" #include "mc.h" #include "cpu.h" -using namespace WelsDec; +using namespace WelsCommon; #include "mc_test_common.h" diff --git a/test/encoder/EncUT_MotionCompensation.cpp b/test/encoder/EncUT_MotionCompensation.cpp index 6ed9ed22..4454ebb4 100644 --- a/test/encoder/EncUT_MotionCompensation.cpp +++ b/test/encoder/EncUT_MotionCompensation.cpp @@ -1,10 +1,9 @@ #include #include "codec_def.h" +#include "macros.h" #include "mc.h" #include "cpu.h" -using namespace WelsEnc; - -#define InitMcFunc WelsInitMcFuncs +using namespace WelsCommon; #include "mc_test_common.h" @@ -27,7 +26,7 @@ TEST (EncMcAvg, PixelAvg) { int32_t width = 8 << w; int32_t height = 16; uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL); - WelsInitMcFuncs (&sMcFunc, uiCpuFlag); + InitMcFunc (&sMcFunc, uiCpuFlag); uint8_t uSrc1[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; uint8_t uSrc2[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; ENFORCE_STACK_ALIGN_2D (uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); @@ -76,7 +75,7 @@ TEST (EncMcHalfpel, LumaHalfpel) { } uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL); - WelsInitMcFuncs (&sMcFunc, uiCpuFlag); + InitMcFunc (&sMcFunc, uiCpuFlag); MCHalfPelFilterAnchor (uAnchors[1], uAnchors[2], uAnchors[3], uAnchors[0], MC_BUFF_SRC_STRIDE, width, height, pBuf + 4); sMcFunc.pfLumaHalfpelHor (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height);