Move the MC routines to the common library
Use the decoder versions of the functions (which are capable of handling widths 4/8/16 for luma, not only 16 as in the encoder). By using the more generic versions, there may be a small performance loss since the functions need to check the width in every call. Actual measurements show that the actual change is very small (and the shared routines turn out to actually be faster than the existing ones in ARM NEON setups).
This commit is contained in:
@@ -19,6 +19,7 @@
|
|||||||
4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
|
4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
|
||||||
53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 53C1C9BB193F0FB000404D8F /* expand_pic.cpp */; };
|
53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 53C1C9BB193F0FB000404D8F /* expand_pic.cpp */; };
|
||||||
5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */; };
|
5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */; };
|
||||||
|
5BDD15ED1A79027600B6CA2E /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5BDD15EC1A79027600B6CA2E /* mc.cpp */; };
|
||||||
F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0B204F818FD23BF005DA23F /* copy_mb.cpp */; };
|
F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0B204F818FD23BF005DA23F /* copy_mb.cpp */; };
|
||||||
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8221906673900E156A8 /* arm_arch64_common_macro.S */; };
|
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8221906673900E156A8 /* arm_arch64_common_macro.S */; };
|
||||||
F556A8251906673900E156A8 /* expand_picture_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */; };
|
F556A8251906673900E156A8 /* expand_picture_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */; };
|
||||||
@@ -54,7 +55,6 @@
|
|||||||
4C3406BA18D96EA600DFA14A /* deblocking_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deblocking_common.h; sourceTree = "<group>"; };
|
4C3406BA18D96EA600DFA14A /* deblocking_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deblocking_common.h; sourceTree = "<group>"; };
|
||||||
4C3406BD18D96EA600DFA14A /* ls_defines.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ls_defines.h; sourceTree = "<group>"; };
|
4C3406BD18D96EA600DFA14A /* ls_defines.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ls_defines.h; sourceTree = "<group>"; };
|
||||||
4C3406BE18D96EA600DFA14A /* macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = macros.h; sourceTree = "<group>"; };
|
4C3406BE18D96EA600DFA14A /* macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = macros.h; sourceTree = "<group>"; };
|
||||||
4C3406BF18D96EA600DFA14A /* mc_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc_common.h; sourceTree = "<group>"; };
|
|
||||||
4C3406C018D96EA600DFA14A /* measure_time.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = measure_time.h; sourceTree = "<group>"; };
|
4C3406C018D96EA600DFA14A /* measure_time.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = measure_time.h; sourceTree = "<group>"; };
|
||||||
4C3406C118D96EA600DFA14A /* typedefs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = typedefs.h; sourceTree = "<group>"; };
|
4C3406C118D96EA600DFA14A /* typedefs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = typedefs.h; sourceTree = "<group>"; };
|
||||||
4C3406C218D96EA600DFA14A /* WelsThreadLib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WelsThreadLib.h; sourceTree = "<group>"; };
|
4C3406C218D96EA600DFA14A /* WelsThreadLib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WelsThreadLib.h; sourceTree = "<group>"; };
|
||||||
@@ -70,6 +70,8 @@
|
|||||||
53C1C9BB193F0FB000404D8F /* expand_pic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expand_pic.cpp; sourceTree = "<group>"; };
|
53C1C9BB193F0FB000404D8F /* expand_pic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expand_pic.cpp; sourceTree = "<group>"; };
|
||||||
5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wels_common_defs.h; sourceTree = "<group>"; };
|
5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wels_common_defs.h; sourceTree = "<group>"; };
|
||||||
5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common_tables.cpp; sourceTree = "<group>"; };
|
5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common_tables.cpp; sourceTree = "<group>"; };
|
||||||
|
5BDD15EB1A79026A00B6CA2E /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = "<group>"; };
|
||||||
|
5BDD15EC1A79027600B6CA2E /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = "<group>"; };
|
||||||
F0B204F718FD23B6005DA23F /* copy_mb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = copy_mb.h; sourceTree = "<group>"; };
|
F0B204F718FD23B6005DA23F /* copy_mb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = copy_mb.h; sourceTree = "<group>"; };
|
||||||
F0B204F818FD23BF005DA23F /* copy_mb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = copy_mb.cpp; sourceTree = "<group>"; };
|
F0B204F818FD23BF005DA23F /* copy_mb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = copy_mb.cpp; sourceTree = "<group>"; };
|
||||||
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = arm_arch64_common_macro.S; path = arm64/arm_arch64_common_macro.S; sourceTree = "<group>"; };
|
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = arm_arch64_common_macro.S; path = arm64/arm_arch64_common_macro.S; sourceTree = "<group>"; };
|
||||||
@@ -123,7 +125,7 @@
|
|||||||
4C3406BA18D96EA600DFA14A /* deblocking_common.h */,
|
4C3406BA18D96EA600DFA14A /* deblocking_common.h */,
|
||||||
4C3406BD18D96EA600DFA14A /* ls_defines.h */,
|
4C3406BD18D96EA600DFA14A /* ls_defines.h */,
|
||||||
4C3406BE18D96EA600DFA14A /* macros.h */,
|
4C3406BE18D96EA600DFA14A /* macros.h */,
|
||||||
4C3406BF18D96EA600DFA14A /* mc_common.h */,
|
5BDD15EB1A79026A00B6CA2E /* mc.h */,
|
||||||
4C3406C018D96EA600DFA14A /* measure_time.h */,
|
4C3406C018D96EA600DFA14A /* measure_time.h */,
|
||||||
4C3406C118D96EA600DFA14A /* typedefs.h */,
|
4C3406C118D96EA600DFA14A /* typedefs.h */,
|
||||||
5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */,
|
5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */,
|
||||||
@@ -143,6 +145,7 @@
|
|||||||
4C3406C518D96EA600DFA14A /* crt_util_safe_x.cpp */,
|
4C3406C518D96EA600DFA14A /* crt_util_safe_x.cpp */,
|
||||||
53C1C9BB193F0FB000404D8F /* expand_pic.cpp */,
|
53C1C9BB193F0FB000404D8F /* expand_pic.cpp */,
|
||||||
4C3406C618D96EA600DFA14A /* deblocking_common.cpp */,
|
4C3406C618D96EA600DFA14A /* deblocking_common.cpp */,
|
||||||
|
5BDD15EC1A79027600B6CA2E /* mc.cpp */,
|
||||||
4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */,
|
4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */,
|
||||||
);
|
);
|
||||||
path = src;
|
path = src;
|
||||||
@@ -253,6 +256,7 @@
|
|||||||
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */,
|
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */,
|
||||||
4C3406C918D96EA600DFA14A /* arm_arch_common_macro.S in Sources */,
|
4C3406C918D96EA600DFA14A /* arm_arch_common_macro.S in Sources */,
|
||||||
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */,
|
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */,
|
||||||
|
5BDD15ED1A79027600B6CA2E /* mc.cpp in Sources */,
|
||||||
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */,
|
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */,
|
||||||
4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */,
|
4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */,
|
||||||
F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */,
|
F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */,
|
||||||
|
|||||||
@@ -20,7 +20,6 @@
|
|||||||
4CE4469318BC5EAB0017DF25 /* fmo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467018BC5EAA0017DF25 /* fmo.cpp */; };
|
4CE4469318BC5EAB0017DF25 /* fmo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467018BC5EAA0017DF25 /* fmo.cpp */; };
|
||||||
4CE4469418BC5EAB0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */; };
|
4CE4469418BC5EAB0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */; };
|
||||||
4CE4469518BC5EAB0017DF25 /* manage_dec_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */; };
|
4CE4469518BC5EAB0017DF25 /* manage_dec_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */; };
|
||||||
4CE4469618BC5EAB0017DF25 /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467318BC5EAA0017DF25 /* mc.cpp */; };
|
|
||||||
4CE4469718BC5EAB0017DF25 /* mem_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467418BC5EAA0017DF25 /* mem_align.cpp */; };
|
4CE4469718BC5EAB0017DF25 /* mem_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467418BC5EAA0017DF25 /* mem_align.cpp */; };
|
||||||
4CE4469818BC5EAB0017DF25 /* memmgr_nal_unit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */; };
|
4CE4469818BC5EAB0017DF25 /* memmgr_nal_unit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */; };
|
||||||
4CE4469918BC5EAB0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */; };
|
4CE4469918BC5EAB0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */; };
|
||||||
@@ -73,7 +72,6 @@
|
|||||||
4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
|
4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
|
||||||
4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = manage_dec_ref.h; sourceTree = "<group>"; };
|
4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = manage_dec_ref.h; sourceTree = "<group>"; };
|
||||||
4CE4465518BC5EAA0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = "<group>"; };
|
4CE4465518BC5EAA0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = "<group>"; };
|
||||||
4CE4465618BC5EAA0017DF25 /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = "<group>"; };
|
|
||||||
4CE4465718BC5EAA0017DF25 /* mem_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mem_align.h; sourceTree = "<group>"; };
|
4CE4465718BC5EAA0017DF25 /* mem_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mem_align.h; sourceTree = "<group>"; };
|
||||||
4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memmgr_nal_unit.h; sourceTree = "<group>"; };
|
4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memmgr_nal_unit.h; sourceTree = "<group>"; };
|
||||||
4CE4465918BC5EAA0017DF25 /* mv_pred.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mv_pred.h; sourceTree = "<group>"; };
|
4CE4465918BC5EAA0017DF25 /* mv_pred.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mv_pred.h; sourceTree = "<group>"; };
|
||||||
@@ -99,7 +97,6 @@
|
|||||||
4CE4467018BC5EAA0017DF25 /* fmo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fmo.cpp; sourceTree = "<group>"; };
|
4CE4467018BC5EAA0017DF25 /* fmo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fmo.cpp; sourceTree = "<group>"; };
|
||||||
4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = "<group>"; };
|
4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = "<group>"; };
|
||||||
4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = manage_dec_ref.cpp; sourceTree = "<group>"; };
|
4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = manage_dec_ref.cpp; sourceTree = "<group>"; };
|
||||||
4CE4467318BC5EAA0017DF25 /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = "<group>"; tabWidth = 1; usesTabs = 0; wrapsLines = 1; };
|
|
||||||
4CE4467418BC5EAA0017DF25 /* mem_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mem_align.cpp; sourceTree = "<group>"; };
|
4CE4467418BC5EAA0017DF25 /* mem_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mem_align.cpp; sourceTree = "<group>"; };
|
||||||
4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memmgr_nal_unit.cpp; sourceTree = "<group>"; };
|
4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memmgr_nal_unit.cpp; sourceTree = "<group>"; };
|
||||||
4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = "<group>"; };
|
4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = "<group>"; };
|
||||||
@@ -218,7 +215,6 @@
|
|||||||
4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */,
|
4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */,
|
||||||
4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */,
|
4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */,
|
||||||
4CE4465518BC5EAA0017DF25 /* mb_cache.h */,
|
4CE4465518BC5EAA0017DF25 /* mb_cache.h */,
|
||||||
4CE4465618BC5EAA0017DF25 /* mc.h */,
|
|
||||||
4CE4465718BC5EAA0017DF25 /* mem_align.h */,
|
4CE4465718BC5EAA0017DF25 /* mem_align.h */,
|
||||||
4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */,
|
4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */,
|
||||||
4CE4465918BC5EAA0017DF25 /* mv_pred.h */,
|
4CE4465918BC5EAA0017DF25 /* mv_pred.h */,
|
||||||
@@ -256,7 +252,6 @@
|
|||||||
4CE4467018BC5EAA0017DF25 /* fmo.cpp */,
|
4CE4467018BC5EAA0017DF25 /* fmo.cpp */,
|
||||||
4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */,
|
4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */,
|
||||||
4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */,
|
4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */,
|
||||||
4CE4467318BC5EAA0017DF25 /* mc.cpp */,
|
|
||||||
4CE4467418BC5EAA0017DF25 /* mem_align.cpp */,
|
4CE4467418BC5EAA0017DF25 /* mem_align.cpp */,
|
||||||
4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */,
|
4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */,
|
||||||
4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */,
|
4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */,
|
||||||
@@ -375,7 +370,6 @@
|
|||||||
4CBC1B81194AC4E100214D9E /* intra_pred_aarch64_neon.S in Sources */,
|
4CBC1B81194AC4E100214D9E /* intra_pred_aarch64_neon.S in Sources */,
|
||||||
4CE4469018BC5EAB0017DF25 /* decoder_core.cpp in Sources */,
|
4CE4469018BC5EAB0017DF25 /* decoder_core.cpp in Sources */,
|
||||||
4CE447AE18BC6BE90017DF25 /* intra_pred_neon.S in Sources */,
|
4CE447AE18BC6BE90017DF25 /* intra_pred_neon.S in Sources */,
|
||||||
4CE4469618BC5EAB0017DF25 /* mc.cpp in Sources */,
|
|
||||||
4CE4469C18BC5EAB0017DF25 /* rec_mb.cpp in Sources */,
|
4CE4469C18BC5EAB0017DF25 /* rec_mb.cpp in Sources */,
|
||||||
4CE4468B18BC5EAB0017DF25 /* bit_stream.cpp in Sources */,
|
4CE4468B18BC5EAB0017DF25 /* bit_stream.cpp in Sources */,
|
||||||
4CE4468D18BC5EAB0017DF25 /* decode_mb_aux.cpp in Sources */,
|
4CE4468D18BC5EAB0017DF25 /* decode_mb_aux.cpp in Sources */,
|
||||||
|
|||||||
@@ -24,7 +24,6 @@
|
|||||||
4CE4471318BC605C0017DF25 /* encoder_data_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */; };
|
4CE4471318BC605C0017DF25 /* encoder_data_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */; };
|
||||||
4CE4471418BC605C0017DF25 /* encoder_ext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E318BC605C0017DF25 /* encoder_ext.cpp */; };
|
4CE4471418BC605C0017DF25 /* encoder_ext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E318BC605C0017DF25 /* encoder_ext.cpp */; };
|
||||||
4CE4471618BC605C0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */; };
|
4CE4471618BC605C0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */; };
|
||||||
4CE4471718BC605C0017DF25 /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E618BC605C0017DF25 /* mc.cpp */; };
|
|
||||||
4CE4471818BC605C0017DF25 /* md.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E718BC605C0017DF25 /* md.cpp */; };
|
4CE4471818BC605C0017DF25 /* md.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E718BC605C0017DF25 /* md.cpp */; };
|
||||||
4CE4471918BC605C0017DF25 /* memory_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E818BC605C0017DF25 /* memory_align.cpp */; };
|
4CE4471918BC605C0017DF25 /* memory_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E818BC605C0017DF25 /* memory_align.cpp */; };
|
||||||
4CE4471A18BC605C0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E918BC605C0017DF25 /* mv_pred.cpp */; };
|
4CE4471A18BC605C0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E918BC605C0017DF25 /* mv_pred.cpp */; };
|
||||||
@@ -93,7 +92,6 @@
|
|||||||
4CE446B518BC605C0017DF25 /* extern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = extern.h; sourceTree = "<group>"; };
|
4CE446B518BC605C0017DF25 /* extern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = extern.h; sourceTree = "<group>"; };
|
||||||
4CE446B618BC605C0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = "<group>"; };
|
4CE446B618BC605C0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = "<group>"; };
|
||||||
4CE446B718BC605C0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = "<group>"; };
|
4CE446B718BC605C0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = "<group>"; };
|
||||||
4CE446B818BC605C0017DF25 /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = "<group>"; };
|
|
||||||
4CE446B918BC605C0017DF25 /* md.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = md.h; sourceTree = "<group>"; };
|
4CE446B918BC605C0017DF25 /* md.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = md.h; sourceTree = "<group>"; };
|
||||||
4CE446BA18BC605C0017DF25 /* memory_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory_align.h; sourceTree = "<group>"; };
|
4CE446BA18BC605C0017DF25 /* memory_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory_align.h; sourceTree = "<group>"; };
|
||||||
4CE446BB18BC605C0017DF25 /* mt_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mt_defs.h; sourceTree = "<group>"; };
|
4CE446BB18BC605C0017DF25 /* mt_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mt_defs.h; sourceTree = "<group>"; };
|
||||||
@@ -135,7 +133,6 @@
|
|||||||
4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_data_tables.cpp; sourceTree = "<group>"; };
|
4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_data_tables.cpp; sourceTree = "<group>"; };
|
||||||
4CE446E318BC605C0017DF25 /* encoder_ext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_ext.cpp; sourceTree = "<group>"; };
|
4CE446E318BC605C0017DF25 /* encoder_ext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_ext.cpp; sourceTree = "<group>"; };
|
||||||
4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = "<group>"; };
|
4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = "<group>"; };
|
||||||
4CE446E618BC605C0017DF25 /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = "<group>"; };
|
|
||||||
4CE446E718BC605C0017DF25 /* md.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = md.cpp; sourceTree = "<group>"; };
|
4CE446E718BC605C0017DF25 /* md.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = md.cpp; sourceTree = "<group>"; };
|
||||||
4CE446E818BC605C0017DF25 /* memory_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memory_align.cpp; sourceTree = "<group>"; };
|
4CE446E818BC605C0017DF25 /* memory_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memory_align.cpp; sourceTree = "<group>"; };
|
||||||
4CE446E918BC605C0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = "<group>"; };
|
4CE446E918BC605C0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = "<group>"; };
|
||||||
@@ -276,7 +273,6 @@
|
|||||||
4CE446B518BC605C0017DF25 /* extern.h */,
|
4CE446B518BC605C0017DF25 /* extern.h */,
|
||||||
4CE446B618BC605C0017DF25 /* get_intra_predictor.h */,
|
4CE446B618BC605C0017DF25 /* get_intra_predictor.h */,
|
||||||
4CE446B718BC605C0017DF25 /* mb_cache.h */,
|
4CE446B718BC605C0017DF25 /* mb_cache.h */,
|
||||||
4CE446B818BC605C0017DF25 /* mc.h */,
|
|
||||||
4CE446B918BC605C0017DF25 /* md.h */,
|
4CE446B918BC605C0017DF25 /* md.h */,
|
||||||
4CE446BA18BC605C0017DF25 /* memory_align.h */,
|
4CE446BA18BC605C0017DF25 /* memory_align.h */,
|
||||||
4CE446BB18BC605C0017DF25 /* mt_defs.h */,
|
4CE446BB18BC605C0017DF25 /* mt_defs.h */,
|
||||||
@@ -328,7 +324,6 @@
|
|||||||
4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */,
|
4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */,
|
||||||
4CE446E318BC605C0017DF25 /* encoder_ext.cpp */,
|
4CE446E318BC605C0017DF25 /* encoder_ext.cpp */,
|
||||||
4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */,
|
4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */,
|
||||||
4CE446E618BC605C0017DF25 /* mc.cpp */,
|
|
||||||
4CE446E718BC605C0017DF25 /* md.cpp */,
|
4CE446E718BC605C0017DF25 /* md.cpp */,
|
||||||
4CE446E818BC605C0017DF25 /* memory_align.cpp */,
|
4CE446E818BC605C0017DF25 /* memory_align.cpp */,
|
||||||
4CE446E918BC605C0017DF25 /* mv_pred.cpp */,
|
4CE446E918BC605C0017DF25 /* mv_pred.cpp */,
|
||||||
@@ -455,7 +450,6 @@
|
|||||||
4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */,
|
4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */,
|
||||||
F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */,
|
F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */,
|
||||||
4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */,
|
4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */,
|
||||||
4CE4471718BC605C0017DF25 /* mc.cpp in Sources */,
|
|
||||||
F7E9994519EBD1E9009B1021 /* svc_set_mb_syn_cabac.cpp in Sources */,
|
F7E9994519EBD1E9009B1021 /* svc_set_mb_syn_cabac.cpp in Sources */,
|
||||||
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */,
|
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */,
|
||||||
4CE4472918BC605C0017DF25 /* svc_set_mb_syn_cavlc.cpp in Sources */,
|
4CE4472918BC605C0017DF25 /* svc_set_mb_syn_cavlc.cpp in Sources */,
|
||||||
|
|||||||
@@ -744,11 +744,7 @@
|
|||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\inc\mc.h"
|
RelativePath="..\..\..\common\inc\mc.h"
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\..\common\inc\mc_common.h"
|
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
@@ -909,7 +905,7 @@
|
|||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\src\mc.cpp"
|
RelativePath="..\..\..\common\src\mc.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
|
|||||||
@@ -386,7 +386,7 @@
|
|||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\encoder\core\src\mc.cpp"
|
RelativePath="..\..\..\common\src\mc.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
@@ -563,11 +563,7 @@
|
|||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\encoder\core\inc\mc.h"
|
RelativePath="..\..\..\common\inc\mc.h"
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\..\common\inc\mc_common.h"
|
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
|
|||||||
@@ -30,11 +30,36 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef MC_COMMON_H
|
#ifndef MC_H
|
||||||
#define MC_COMMON_H
|
#define MC_H
|
||||||
|
|
||||||
#include "typedefs.h"
|
#include "typedefs.h"
|
||||||
|
|
||||||
|
typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
|
||||||
|
|
||||||
|
typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight);
|
||||||
|
typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t,
|
||||||
|
int32_t, int32_t);
|
||||||
|
|
||||||
|
typedef struct TagMcFunc {
|
||||||
|
PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
|
||||||
|
PWelsLumaHalfpelMcFunc pfLumaHalfpelVer;
|
||||||
|
PWelsLumaHalfpelMcFunc pfLumaHalfpelCen;
|
||||||
|
PWelsMcFunc pMcChromaFunc;
|
||||||
|
|
||||||
|
PWelsMcFunc pMcLumaFunc;
|
||||||
|
PWelsSampleAveragingFunc pfSampleAveraging;
|
||||||
|
} SMcFunc;
|
||||||
|
|
||||||
|
namespace WelsCommon {
|
||||||
|
|
||||||
|
void InitMcFunc (SMcFunc* pMcFunc, uint32_t iCpu);
|
||||||
|
|
||||||
|
} // namespace WelsCommon
|
||||||
|
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
@@ -272,4 +297,4 @@ void McChromaWidthEq8_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* p
|
|||||||
}
|
}
|
||||||
#endif//__cplusplus
|
#endif//__cplusplus
|
||||||
|
|
||||||
#endif//MC_COMMON_H
|
#endif//MC_H
|
||||||
@@ -41,8 +41,17 @@
|
|||||||
#include "mc.h"
|
#include "mc.h"
|
||||||
|
|
||||||
#include "cpu_core.h"
|
#include "cpu_core.h"
|
||||||
|
#include "ls_defines.h"
|
||||||
|
#include "macros.h"
|
||||||
|
|
||||||
namespace WelsDec {
|
typedef void (*PMcChromaWidthExtFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
const uint8_t* kpABCD, int32_t iHeight);
|
||||||
|
typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
|
||||||
|
int32_t, int32_t);
|
||||||
|
typedef void (*PWelsMcWidthHeightFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight);
|
||||||
|
|
||||||
|
namespace WelsCommon {
|
||||||
|
|
||||||
/*------------------weight for chroma fraction pixel interpolation------------------*/
|
/*------------------weight for chroma fraction pixel interpolation------------------*/
|
||||||
//iA = (8 - dx) * (8 - dy);
|
//iA = (8 - dx) * (8 - dy);
|
||||||
@@ -84,9 +93,6 @@ static const uint8_t g_kuiABCD[8][8][4] = { //g_kA[dy][dx], g_kB[dy][dx], g_kC[d
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*PWelsMcWidthHeightFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight);
|
|
||||||
|
|
||||||
//***************************************************************************//
|
//***************************************************************************//
|
||||||
// C code implementation //
|
// C code implementation //
|
||||||
//***************************************************************************//
|
//***************************************************************************//
|
||||||
@@ -176,6 +182,7 @@ static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* p
|
|||||||
McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
|
||||||
static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth,
|
int32_t iWidth,
|
||||||
int32_t iHeight) {
|
int32_t iHeight) {
|
||||||
@@ -189,6 +196,7 @@ static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//vertical filter to gain half sample, that is (0, 2) location in quarter sample
|
||||||
static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth,
|
int32_t iWidth,
|
||||||
int32_t iHeight) {
|
int32_t iHeight) {
|
||||||
@@ -202,6 +210,7 @@ static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
|
||||||
static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth,
|
int32_t iWidth,
|
||||||
int32_t iHeight) {
|
int32_t iHeight) {
|
||||||
@@ -390,6 +399,14 @@ static inline void McHorVer22WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcSt
|
|||||||
McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
|
McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
|
||||||
}
|
}
|
||||||
|
void McHorVer22Width9Or17Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight) {
|
||||||
|
ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16)
|
||||||
|
int32_t tmp1 = 2 * (iWidth - 8);
|
||||||
|
McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5);
|
||||||
|
McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight);
|
||||||
|
McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth,
|
int32_t iWidth,
|
||||||
@@ -685,11 +702,41 @@ void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int
|
|||||||
McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
|
McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
||||||
|
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
||||||
|
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
||||||
|
PixelAvgWidthEq8_mmx,
|
||||||
|
PixelAvgWidthEq16_sse2
|
||||||
|
};
|
||||||
|
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //X86_ASM
|
#endif //X86_ASM
|
||||||
//***************************************************************************//
|
//***************************************************************************//
|
||||||
// NEON implementation //
|
// NEON implementation //
|
||||||
//***************************************************************************//
|
//***************************************************************************//
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
|
void McHorVer20Width9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight) {
|
||||||
|
if (iWidth == 17)
|
||||||
|
McHorVer20Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
else //if (iWidth == 9)
|
||||||
|
McHorVer20Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
}
|
||||||
|
void McHorVer02Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight) {
|
||||||
|
if (iWidth == 16)
|
||||||
|
McHorVer02Height17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
else //if (iWidth == 8)
|
||||||
|
McHorVer02Height9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
}
|
||||||
|
void McHorVer22Width9Or17Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight) {
|
||||||
|
if (iWidth == 17)
|
||||||
|
McHorVer22Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
else //if (iWidth == 9)
|
||||||
|
McHorVer22Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
}
|
||||||
void McCopy_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
void McCopy_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth, int32_t iHeight) {
|
int32_t iWidth, int32_t iHeight) {
|
||||||
if (16 == iWidth)
|
if (16 == iWidth)
|
||||||
@@ -941,8 +988,38 @@ void McChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int3
|
|||||||
McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
|
McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
||||||
|
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
||||||
|
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
||||||
|
PixStrideAvgWidthEq8_neon,
|
||||||
|
PixStrideAvgWidthEq16_neon
|
||||||
|
};
|
||||||
|
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_NEON_AARCH64)
|
#if defined(HAVE_NEON_AARCH64)
|
||||||
|
void McHorVer20Width9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight) {
|
||||||
|
if (iWidth == 17)
|
||||||
|
McHorVer20Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
else //if (iWidth == 9)
|
||||||
|
McHorVer20Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
}
|
||||||
|
void McHorVer02Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight) {
|
||||||
|
if (iWidth == 16)
|
||||||
|
McHorVer02Height17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
else //if (iWidth == 8)
|
||||||
|
McHorVer02Height9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
}
|
||||||
|
void McHorVer22Width9Or17Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst,
|
||||||
|
int32_t iDstStride,
|
||||||
|
int32_t iWidth, int32_t iHeight) {
|
||||||
|
if (iWidth == 17)
|
||||||
|
McHorVer22Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
else //if (iWidth == 9)
|
||||||
|
McHorVer22Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
|
}
|
||||||
void McCopy_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
void McCopy_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth, int32_t iHeight) {
|
int32_t iWidth, int32_t iHeight) {
|
||||||
if (16 == iWidth)
|
if (16 == iWidth)
|
||||||
@@ -1194,33 +1271,58 @@ void McChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pD
|
|||||||
McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
|
McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
||||||
|
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
||||||
|
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
||||||
|
PixStrideAvgWidthEq8_AArch64_neon,
|
||||||
|
PixStrideAvgWidthEq16_AArch64_neon
|
||||||
|
};
|
||||||
|
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu) {
|
void InitMcFunc (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
|
||||||
pMcFunc->pMcLumaFunc = McLuma_c;
|
pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
|
||||||
pMcFunc->pMcChromaFunc = McChroma_c;
|
pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
|
||||||
|
pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
|
||||||
|
pMcFuncs->pfSampleAveraging = PixelAvg_c;
|
||||||
|
pMcFuncs->pMcChromaFunc = McChroma_c;
|
||||||
|
pMcFuncs->pMcLumaFunc = McLuma_c;
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
|
||||||
if (iCpu & WELS_CPU_NEON) {
|
|
||||||
pMcFunc->pMcLumaFunc = McLuma_neon;
|
|
||||||
pMcFunc->pMcChromaFunc = McChroma_neon;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifdef HAVE_NEON_AARCH64
|
|
||||||
if (iCpu & WELS_CPU_NEON) {
|
|
||||||
pMcFunc->pMcLumaFunc = McLuma_AArch64_neon;
|
|
||||||
pMcFunc->pMcChromaFunc = McChroma_AArch64_neon;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined (X86_ASM)
|
#if defined (X86_ASM)
|
||||||
if (iCpu & WELS_CPU_SSE2) {
|
if (uiCpuFlag & WELS_CPU_SSE2) {
|
||||||
pMcFunc->pMcLumaFunc = McLuma_sse2;
|
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
|
||||||
pMcFunc->pMcChromaFunc = McChroma_sse2;
|
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
|
||||||
|
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
|
||||||
|
pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
|
||||||
|
pMcFuncs->pMcChromaFunc = McChroma_sse2;
|
||||||
|
pMcFuncs->pMcLumaFunc = McLuma_sse2;
|
||||||
}
|
}
|
||||||
if (iCpu & WELS_CPU_SSSE3) {
|
|
||||||
pMcFunc->pMcChromaFunc = McChroma_ssse3;
|
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
||||||
|
pMcFuncs->pMcChromaFunc = McChroma_ssse3;
|
||||||
}
|
}
|
||||||
#endif //(X86_ASM)
|
#endif //(X86_ASM)
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace WelsDec
|
#if defined(HAVE_NEON)
|
||||||
|
if (uiCpuFlag & WELS_CPU_NEON) {
|
||||||
|
pMcFuncs->pMcLumaFunc = McLuma_neon;
|
||||||
|
pMcFuncs->pMcChromaFunc = McChroma_neon;
|
||||||
|
pMcFuncs->pfSampleAveraging = PixelAvg_neon;
|
||||||
|
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
|
||||||
|
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
|
||||||
|
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAVE_NEON_AARCH64)
|
||||||
|
if (uiCpuFlag & WELS_CPU_NEON) {
|
||||||
|
pMcFuncs->pMcLumaFunc = McLuma_AArch64_neon;
|
||||||
|
pMcFuncs->pMcChromaFunc = McChroma_AArch64_neon;
|
||||||
|
pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
|
||||||
|
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
|
||||||
|
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
|
||||||
|
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} // namespace WelsCommon
|
||||||
@@ -7,6 +7,7 @@ COMMON_CPP_SRCS=\
|
|||||||
$(COMMON_SRCDIR)/src/deblocking_common.cpp\
|
$(COMMON_SRCDIR)/src/deblocking_common.cpp\
|
||||||
$(COMMON_SRCDIR)/src/expand_pic.cpp\
|
$(COMMON_SRCDIR)/src/expand_pic.cpp\
|
||||||
$(COMMON_SRCDIR)/src/intra_pred_common.cpp\
|
$(COMMON_SRCDIR)/src/intra_pred_common.cpp\
|
||||||
|
$(COMMON_SRCDIR)/src/mc.cpp\
|
||||||
$(COMMON_SRCDIR)/src/sad_common.cpp\
|
$(COMMON_SRCDIR)/src/sad_common.cpp\
|
||||||
$(COMMON_SRCDIR)/src/utils.cpp\
|
$(COMMON_SRCDIR)/src/utils.cpp\
|
||||||
$(COMMON_SRCDIR)/src/welsCodecTrace.cpp\
|
$(COMMON_SRCDIR)/src/welsCodecTrace.cpp\
|
||||||
|
|||||||
@@ -55,6 +55,7 @@
|
|||||||
#include "crt_util_safe_x.h"
|
#include "crt_util_safe_x.h"
|
||||||
#include "mb_cache.h"
|
#include "mb_cache.h"
|
||||||
#include "expand_pic.h"
|
#include "expand_pic.h"
|
||||||
|
#include "mc.h"
|
||||||
|
|
||||||
namespace WelsDec {
|
namespace WelsDec {
|
||||||
#define MAX_PRED_MODE_ID_I16x16 3
|
#define MAX_PRED_MODE_ID_I16x16 3
|
||||||
@@ -142,13 +143,6 @@ uint8_t uiLongRefCount[LIST_A]; // dependend on ref pic module
|
|||||||
int32_t iMaxLongTermFrameIdx;
|
int32_t iMaxLongTermFrameIdx;
|
||||||
} SRefPic, *PRefPic;
|
} SRefPic, *PRefPic;
|
||||||
|
|
||||||
typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
|
|
||||||
typedef struct TagMcFunc {
|
|
||||||
PWelsMcFunc pMcLumaFunc;
|
|
||||||
PWelsMcFunc pMcChromaFunc;
|
|
||||||
} SMcFunc;
|
|
||||||
|
|
||||||
typedef void (*PCopyFunc) (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
|
typedef void (*PCopyFunc) (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
|
||||||
typedef struct TagCopyFunc {
|
typedef struct TagCopyFunc {
|
||||||
PCopyFunc pCopyLumaFunc;
|
PCopyFunc pCopyLumaFunc;
|
||||||
|
|||||||
@@ -1,50 +0,0 @@
|
|||||||
/*!
|
|
||||||
* \copy
|
|
||||||
* Copyright (c) 2013, Cisco Systems
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in
|
|
||||||
* the documentation and/or other materials provided with the
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef WELS_MC_H__
|
|
||||||
#define WELS_MC_H__
|
|
||||||
|
|
||||||
#include "wels_const.h"
|
|
||||||
#include "macros.h"
|
|
||||||
#include "decoder_context.h"
|
|
||||||
#include "mc_common.h"
|
|
||||||
|
|
||||||
namespace WelsDec {
|
|
||||||
|
|
||||||
typedef void (*PMcChromaWidthExtFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
const uint8_t* kpABCD, int32_t iHeight);
|
|
||||||
|
|
||||||
void InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu);
|
|
||||||
|
|
||||||
} // namespace WelsDec
|
|
||||||
|
|
||||||
#endif//WELS_MC_H__
|
|
||||||
@@ -13,7 +13,6 @@ DECODER_CPP_SRCS=\
|
|||||||
$(DECODER_SRCDIR)/core/src/fmo.cpp\
|
$(DECODER_SRCDIR)/core/src/fmo.cpp\
|
||||||
$(DECODER_SRCDIR)/core/src/get_intra_predictor.cpp\
|
$(DECODER_SRCDIR)/core/src/get_intra_predictor.cpp\
|
||||||
$(DECODER_SRCDIR)/core/src/manage_dec_ref.cpp\
|
$(DECODER_SRCDIR)/core/src/manage_dec_ref.cpp\
|
||||||
$(DECODER_SRCDIR)/core/src/mc.cpp\
|
|
||||||
$(DECODER_SRCDIR)/core/src/mem_align.cpp\
|
$(DECODER_SRCDIR)/core/src/mem_align.cpp\
|
||||||
$(DECODER_SRCDIR)/core/src/memmgr_nal_unit.cpp\
|
$(DECODER_SRCDIR)/core/src/memmgr_nal_unit.cpp\
|
||||||
$(DECODER_SRCDIR)/core/src/mv_pred.cpp\
|
$(DECODER_SRCDIR)/core/src/mv_pred.cpp\
|
||||||
|
|||||||
@@ -1,51 +0,0 @@
|
|||||||
/*!
|
|
||||||
* \copy
|
|
||||||
* Copyright (c) 2013, Cisco Systems
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in
|
|
||||||
* the documentation and/or other materials provided with the
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
//macroblock.h
|
|
||||||
#ifndef WELS_MC_H__
|
|
||||||
#define WELS_MC_H__
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
#include "typedefs.h"
|
|
||||||
#include "wels_const.h"
|
|
||||||
#include "macros.h"
|
|
||||||
#include "wels_func_ptr_def.h"
|
|
||||||
#include "mc_common.h"
|
|
||||||
|
|
||||||
/////////////////////luma MC//////////////////////////
|
|
||||||
//x y means dx(mv[0] & 3) and dy(mv[1] & 3)
|
|
||||||
|
|
||||||
namespace WelsEnc {
|
|
||||||
void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag);
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif//WELS_MC_H__
|
|
||||||
@@ -44,6 +44,7 @@
|
|||||||
#include "expand_pic.h"
|
#include "expand_pic.h"
|
||||||
#include "rc.h"
|
#include "rc.h"
|
||||||
#include "IWelsVP.h"
|
#include "IWelsVP.h"
|
||||||
|
#include "mc.h"
|
||||||
|
|
||||||
namespace WelsEnc {
|
namespace WelsEnc {
|
||||||
|
|
||||||
@@ -74,25 +75,6 @@ typedef int32_t (*PQuantizationSkipFunc) (int16_t* pDct, int16_t iFF, int16_t i
|
|||||||
typedef int32_t (*PQuantizationHadamardFunc) (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct,
|
typedef int32_t (*PQuantizationHadamardFunc) (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct,
|
||||||
int16_t* pBlock);
|
int16_t* pBlock);
|
||||||
|
|
||||||
typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
|
|
||||||
|
|
||||||
typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight);
|
|
||||||
typedef void (*PWelsLumaQuarpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight);
|
|
||||||
typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t, int32_t);
|
|
||||||
|
|
||||||
typedef struct TagMcFunc {
|
|
||||||
PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
|
|
||||||
PWelsLumaHalfpelMcFunc pfLumaHalfpelVer;
|
|
||||||
PWelsLumaHalfpelMcFunc pfLumaHalfpelCen;
|
|
||||||
PWelsMcFunc pMcChromaFunc;
|
|
||||||
|
|
||||||
PWelsMcFunc pMcLumaFunc;
|
|
||||||
PWelsSampleAveragingFunc pfSampleAveraging;
|
|
||||||
} SMcFunc;
|
|
||||||
|
|
||||||
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
|
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
|
||||||
typedef void (*PLumaDeblockingEQ4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
typedef void (*PLumaDeblockingEQ4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
|
||||||
typedef void (*PChromaDeblockingLT4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha,
|
typedef void (*PChromaDeblockingLT4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha,
|
||||||
|
|||||||
@@ -209,7 +209,7 @@ int32_t InitFunctionPointers (sWelsEncCtx* pEncCtx, SWelsSvcCodingParam* pParam,
|
|||||||
/* Motion compensation */
|
/* Motion compensation */
|
||||||
/*init pixel average function*/
|
/*init pixel average function*/
|
||||||
/*get one column or row pixel when refinement*/
|
/*get one column or row pixel when refinement*/
|
||||||
WelsInitMcFuncs (&pFuncList->sMcFuncs, uiCpuFlag);
|
InitMcFunc (&pFuncList->sMcFuncs, uiCpuFlag);
|
||||||
InitCoeffFunc (pFuncList,uiCpuFlag,pParam->iEntropyCodingModeFlag);
|
InitCoeffFunc (pFuncList,uiCpuFlag,pParam->iEntropyCodingModeFlag);
|
||||||
|
|
||||||
WelsInitEncodingFuncs (pFuncList, uiCpuFlag);
|
WelsInitEncodingFuncs (pFuncList, uiCpuFlag);
|
||||||
|
|||||||
@@ -1,891 +0,0 @@
|
|||||||
/*!
|
|
||||||
* \copy
|
|
||||||
* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in
|
|
||||||
* the documentation and/or other materials provided with the
|
|
||||||
* distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* \file mc.c
|
|
||||||
*
|
|
||||||
* \brief Interfaces implementation for motion compensation
|
|
||||||
*
|
|
||||||
* \date 03/17/2009 Created
|
|
||||||
*
|
|
||||||
*************************************************************************************
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "mc.h"
|
|
||||||
#include "cpu_core.h"
|
|
||||||
|
|
||||||
typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
|
|
||||||
int32_t, int32_t);
|
|
||||||
|
|
||||||
namespace WelsEnc {
|
|
||||||
/*------------------weight for chroma fraction pixel interpolation------------------*/
|
|
||||||
//kuiA = (8 - dx) * (8 - dy);
|
|
||||||
//kuiB = dx * (8 - dy);
|
|
||||||
//kuiC = (8 - dx) * dy;
|
|
||||||
//kuiD = dx * dy
|
|
||||||
static const uint8_t g_kuiABCD[8][8][4] = { ////g_kuiA[dy][dx], g_kuiB[dy][dx], g_kuiC[dy][dx], g_kuiD[dy][dx]
|
|
||||||
{
|
|
||||||
{64, 0, 0, 0}, {56, 8, 0, 0}, {48, 16, 0, 0}, {40, 24, 0, 0},
|
|
||||||
{32, 32, 0, 0}, {24, 40, 0, 0}, {16, 48, 0, 0}, {8, 56, 0, 0}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{56, 0, 8, 0}, {49, 7, 7, 1}, {42, 14, 6, 2}, {35, 21, 5, 3},
|
|
||||||
{28, 28, 4, 4}, {21, 35, 3, 5}, {14, 42, 2, 6}, {7, 49, 1, 7}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{48, 0, 16, 0}, {42, 6, 14, 2}, {36, 12, 12, 4}, {30, 18, 10, 6},
|
|
||||||
{24, 24, 8, 8}, {18, 30, 6, 10}, {12, 36, 4, 12}, {6, 42, 2, 14}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{40, 0, 24, 0}, {35, 5, 21, 3}, {30, 10, 18, 6}, {25, 15, 15, 9},
|
|
||||||
{20, 20, 12, 12}, {15, 25, 9, 15}, {10, 30, 6, 18}, {5, 35, 3, 21}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{32, 0, 32, 0}, {28, 4, 28, 4}, {24, 8, 24, 8}, {20, 12, 20, 12},
|
|
||||||
{16, 16, 16, 16}, {12, 20, 12, 20}, {8, 24, 8, 24}, {4, 28, 4, 28}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{24, 0, 40, 0}, {21, 3, 35, 5}, {18, 6, 30, 10}, {15, 9, 25, 15},
|
|
||||||
{12, 12, 20, 20}, {9, 15, 15, 25}, {6, 18, 10, 30}, {3, 21, 5, 35}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{16, 0, 48, 0}, {14, 2, 42, 6}, {12, 4, 36, 12}, {10, 6, 30, 18},
|
|
||||||
{8, 8, 24, 24}, {6, 10, 18, 30}, {4, 12, 12, 36}, {2, 14, 6, 42}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{8, 0, 56, 0}, {7, 1, 49, 7}, {6, 2, 42, 14}, {5, 3, 35, 21},
|
|
||||||
{4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//***************************************************************************//
|
|
||||||
// C code implementation //
|
|
||||||
//***************************************************************************//
|
|
||||||
static inline void McCopyWidthEq4_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int32_t i;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
memcpy (pDst, pSrc, 4); // confirmed_safe_unsafe_usage
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void McCopyWidthEq8_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight)
|
|
||||||
|
|
||||||
{
|
|
||||||
int32_t i;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
memcpy (pDst, pSrc, 8); // confirmed_safe_unsafe_usage
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int32_t i;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
memcpy (pDst, pSrc, 16); // confirmed_safe_unsafe_usage
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------Luma sample MC------------------//
|
|
||||||
static inline int32_t HorFilter_c (const uint8_t* pSrc) {
|
|
||||||
int32_t iPix05 = pSrc[-2] + pSrc[3];
|
|
||||||
int32_t iPix14 = pSrc[-1] + pSrc[2];
|
|
||||||
int32_t iPix23 = pSrc[ 0] + pSrc[1];
|
|
||||||
|
|
||||||
return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) {
|
|
||||||
int32_t iPix05 = pSrc[0] + pSrc[5];
|
|
||||||
int32_t iPix14 = pSrc[1] + pSrc[4];
|
|
||||||
int32_t iPix23 = pSrc[2] + pSrc[3];
|
|
||||||
|
|
||||||
return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2));
|
|
||||||
}
|
|
||||||
static inline int32_t VerFilter_c (const uint8_t* pSrc, const int32_t kiSrcStride) {
|
|
||||||
const int32_t kiLine1 = kiSrcStride;
|
|
||||||
const int32_t kiLine2 = (kiSrcStride << 1);
|
|
||||||
const int32_t kiLine3 = kiLine1 + kiLine2;
|
|
||||||
const uint32_t kuiPix05 = * (pSrc - kiLine2) + * (pSrc + kiLine3);
|
|
||||||
const uint32_t kuiPix14 = * (pSrc - kiLine1) + * (pSrc + kiLine2);
|
|
||||||
const uint32_t kuiPix23 = * (pSrc) + * (pSrc + kiLine1);
|
|
||||||
|
|
||||||
return (kuiPix05 - ((kuiPix14 << 2) + kuiPix14) + (kuiPix23 << 4) + (kuiPix23 << 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PixelAvgWidthEq8_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
|
||||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight) {
|
|
||||||
int32_t i, j;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < 8; j++) {
|
|
||||||
pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1;
|
|
||||||
}
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrcA += iSrcAStride;
|
|
||||||
pSrcB += iSrcBStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static inline void PixelAvgWidthEq16_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
|
||||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight) {
|
|
||||||
int32_t i, j;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < 16; j++) {
|
|
||||||
pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1;
|
|
||||||
}
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrcA += iSrcAStride;
|
|
||||||
pSrcB += iSrcBStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
|
|
||||||
static inline void McHorVer20WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int32_t i, j;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < 16; j++) {
|
|
||||||
pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
|
|
||||||
}
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//vertical filter to gain half sample, that is (0, 2) location in quarter sample
|
|
||||||
static inline void McHorVer02WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int32_t i, j;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < 16; j++) {
|
|
||||||
pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
|
|
||||||
}
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
|
|
||||||
static inline void McHorVer22WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int16_t pTmp[16 + 5] = {0}; //16
|
|
||||||
int32_t i, j, k;
|
|
||||||
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < 16 + 5; j++) {
|
|
||||||
pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
|
|
||||||
}
|
|
||||||
for (k = 0; k < 16; k++) {
|
|
||||||
pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
|
|
||||||
}
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
pDst += iDstStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////luma MC//////////////////////////
|
|
||||||
|
|
||||||
static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int32_t i, j;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < iWidth; j++) {
|
|
||||||
pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
|
|
||||||
}
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//vertical filter to gain half sample, that is (0, 2) location in quarter sample
|
|
||||||
static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int32_t i, j;
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < iWidth; j++) {
|
|
||||||
pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
|
|
||||||
}
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
|
|
||||||
static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int16_t pTmp[17 + 5] = {0}; //w+1
|
|
||||||
int32_t i, j, k;
|
|
||||||
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < iWidth + 5; j++) {
|
|
||||||
pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
|
|
||||||
}
|
|
||||||
for (k = 0; k < iWidth; k++) {
|
|
||||||
pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
|
|
||||||
}
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
pDst += iDstStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
|
||||||
int32_t iHeight) {
|
|
||||||
int32_t i;
|
|
||||||
if (iWidth == 16)
|
|
||||||
McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else if (iWidth == 8)
|
|
||||||
McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else if (iWidth == 4)
|
|
||||||
McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else {
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
|
|
||||||
//pSrc has been added the offset of mv
|
|
||||||
{
|
|
||||||
const int32_t kiDx = iMvX & 0x07;
|
|
||||||
const int32_t kiDy = iMvY & 0x07;
|
|
||||||
|
|
||||||
if (0 == kiDx && 0 == kiDy) {
|
|
||||||
McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
|
||||||
} else {
|
|
||||||
const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0];
|
|
||||||
const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1];
|
|
||||||
const int32_t kiDC = g_kuiABCD[kiDy][kiDx][2];
|
|
||||||
const int32_t kiDD = g_kuiABCD[kiDy][kiDx][3];
|
|
||||||
|
|
||||||
int32_t i, j;
|
|
||||||
|
|
||||||
const uint8_t* pSrcNext = pSrc + iSrcStride;
|
|
||||||
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
for (j = 0; j < iWidth; j++) {
|
|
||||||
pDst[j] = (kiDA * pSrc[j] + kiDB * pSrc[j + 1] + kiDC * pSrcNext[j] + kiDD * pSrcNext[j + 1] + 32) >> 6;
|
|
||||||
}
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc = pSrcNext;
|
|
||||||
pSrcNext += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
|
|
||||||
McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
|
|
||||||
McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
|
|
||||||
McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
|
|
||||||
McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
|
|
||||||
};
|
|
||||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
|
||||||
pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
|
||||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
|
||||||
PixelAvgWidthEq8_c,
|
|
||||||
PixelAvgWidthEq16_c
|
|
||||||
};
|
|
||||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
|
||||||
}
|
|
||||||
//***************************************************************************//
|
|
||||||
// MMXEXT and SSE2 implementation //
|
|
||||||
//***************************************************************************//
|
|
||||||
#if defined(X86_ASM)
|
|
||||||
|
|
||||||
static inline void McHorVer22WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 21, 8, 16)
|
|
||||||
McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 16, iHeight + 5);
|
|
||||||
McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 16, pDst, iDstStride, 8, iHeight);
|
|
||||||
}
|
|
||||||
|
|
||||||
//2010.2.5
|
|
||||||
|
|
||||||
static inline void McHorVer02WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* PDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, PDst, iDstStride, iHeight);
|
|
||||||
McHorVer02WidthEq8_sse2 (&pSrc[8], iSrcStride, &PDst[8], iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer22WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void McHorVer22Width9Or17Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16)
|
|
||||||
int32_t tmp1 = 2 * (iWidth - 8);
|
|
||||||
McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5);
|
|
||||||
McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight);
|
|
||||||
McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
|
|
||||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight) {
|
|
||||||
int32_t i;
|
|
||||||
if (iWidth == 16)
|
|
||||||
McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else if (iWidth == 8)
|
|
||||||
McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else if (iWidth == 4)
|
|
||||||
McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else {
|
|
||||||
for (i = 0; i < iHeight; i++) {
|
|
||||||
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
|
|
||||||
pDst += iDstStride;
|
|
||||||
pSrc += iSrcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
const uint8_t* pABCD, int32_t iHeigh);
|
|
||||||
void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
const int32_t kiD8x = iMvX & 0x07;
|
|
||||||
const int32_t kiD8y = iMvY & 0x07;
|
|
||||||
static const McChromaWidthEqx kpfFuncs[2] = {
|
|
||||||
McChromaWidthEq4_mmx,
|
|
||||||
McChromaWidthEq8_sse2
|
|
||||||
};
|
|
||||||
|
|
||||||
if (0 == kiD8x && 0 == kiD8y) {
|
|
||||||
McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
|
||||||
} else {
|
|
||||||
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
const int32_t kiD8x = iMvX & 0x07;
|
|
||||||
const int32_t kiD8y = iMvY & 0x07;
|
|
||||||
|
|
||||||
static const McChromaWidthEqx kpfFuncs[2] = {
|
|
||||||
McChromaWidthEq4_mmx,
|
|
||||||
McChromaWidthEq8_ssse3
|
|
||||||
};
|
|
||||||
if (0 == kiD8x && 0 == kiD8y) {
|
|
||||||
McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
|
||||||
} else {
|
|
||||||
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
|
|
||||||
McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
|
|
||||||
McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
|
|
||||||
McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
|
|
||||||
McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
|
|
||||||
};
|
|
||||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
|
||||||
pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
|
||||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
|
||||||
PixelAvgWidthEq8_mmx,
|
|
||||||
PixelAvgWidthEq16_sse2
|
|
||||||
};
|
|
||||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
|
||||||
}
|
|
||||||
#endif //X86_ASM
|
|
||||||
|
|
||||||
//***************************************************************************//
|
|
||||||
// NEON implementation //
|
|
||||||
//***************************************************************************//
|
|
||||||
#if defined(HAVE_NEON)
|
|
||||||
void McHorVer20Width9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight) {
|
|
||||||
if (iWidth == 17)
|
|
||||||
McHorVer20Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else //if (iWidth == 9)
|
|
||||||
McHorVer20Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void McHorVer02Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight) {
|
|
||||||
if (iWidth == 16)
|
|
||||||
McHorVer02Height17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else //if (iWidth == 8)
|
|
||||||
McHorVer02Height9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void McHorVer22Width9Or17Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight) {
|
|
||||||
if (iWidth == 17)
|
|
||||||
McHorVer22Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else //if (iWidth == 9)
|
|
||||||
McHorVer22Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer11_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer12_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer02WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer13_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer21_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer23_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer31_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer32_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
|
|
||||||
}
|
|
||||||
void EncMcChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
const int32_t kiD8x = iMvX & 0x07;
|
|
||||||
const int32_t kiD8y = iMvY & 0x07;
|
|
||||||
if (0 == kiD8x && 0 == kiD8y) {
|
|
||||||
if (8 == iWidth)
|
|
||||||
McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else // iWidth == 4
|
|
||||||
McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
} else {
|
|
||||||
if (8 == iWidth)
|
|
||||||
McChromaWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
|
|
||||||
else //if(4 == iWidth)
|
|
||||||
McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void EncMcLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
|
|
||||||
McCopyWidthEq16_neon, McHorVer10WidthEq16_neon, McHorVer20WidthEq16_neon, McHorVer30WidthEq16_neon,
|
|
||||||
McHorVer01WidthEq16_neon, EncMcHorVer11_neon, EncMcHorVer21_neon, EncMcHorVer31_neon,
|
|
||||||
McHorVer02WidthEq16_neon, EncMcHorVer12_neon, McHorVer22WidthEq16_neon, EncMcHorVer32_neon,
|
|
||||||
McHorVer03WidthEq16_neon, EncMcHorVer13_neon, EncMcHorVer23_neon, EncMcHorVer33_neon
|
|
||||||
};
|
|
||||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
|
||||||
pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
|
||||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
|
||||||
PixStrideAvgWidthEq8_neon,
|
|
||||||
PixStrideAvgWidthEq16_neon
|
|
||||||
};
|
|
||||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(HAVE_NEON_AARCH64)
|
|
||||||
void McHorVer20Width9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight) {
|
|
||||||
if (iWidth == 17)
|
|
||||||
McHorVer20Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else //if (iWidth == 9)
|
|
||||||
McHorVer20Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void McHorVer02Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight) {
|
|
||||||
if (iWidth == 16)
|
|
||||||
McHorVer02Height17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else //if (iWidth == 8)
|
|
||||||
McHorVer02Height9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void McHorVer22Width9Or17Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst,
|
|
||||||
int32_t iDstStride,
|
|
||||||
int32_t iWidth, int32_t iHeight) {
|
|
||||||
if (iWidth == 17)
|
|
||||||
McHorVer22Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else //if (iWidth == 9)
|
|
||||||
McHorVer22Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer11_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer12_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer13_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer21_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer23_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer31_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer32_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int32_t iHeight) {
|
|
||||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
|
||||||
McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
|
||||||
McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
|
||||||
PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
|
||||||
}
|
|
||||||
void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
const int32_t kiD8x = iMvX & 0x07;
|
|
||||||
const int32_t kiD8y = iMvY & 0x07;
|
|
||||||
if (0 == kiD8x && 0 == kiD8y) {
|
|
||||||
if (8 == iWidth)
|
|
||||||
McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
else // iWidth == 4
|
|
||||||
McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
} else {
|
|
||||||
if (8 == iWidth)
|
|
||||||
McChromaWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
|
|
||||||
else //if(4 == iWidth)
|
|
||||||
McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void EncMcLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
|
||||||
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
|
|
||||||
McCopyWidthEq16_AArch64_neon, McHorVer10WidthEq16_AArch64_neon, McHorVer20WidthEq16_AArch64_neon, McHorVer30WidthEq16_AArch64_neon,
|
|
||||||
McHorVer01WidthEq16_AArch64_neon, EncMcHorVer11_AArch64_neon, EncMcHorVer21_AArch64_neon, EncMcHorVer31_AArch64_neon,
|
|
||||||
McHorVer02WidthEq16_AArch64_neon, EncMcHorVer12_AArch64_neon, McHorVer22WidthEq16_AArch64_neon, EncMcHorVer32_AArch64_neon,
|
|
||||||
McHorVer03WidthEq16_AArch64_neon, EncMcHorVer13_AArch64_neon, EncMcHorVer23_AArch64_neon, EncMcHorVer33_AArch64_neon
|
|
||||||
};
|
|
||||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
|
||||||
pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
|
||||||
}
|
|
||||||
void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
|
||||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
|
||||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
|
||||||
PixStrideAvgWidthEq8_AArch64_neon,
|
|
||||||
PixStrideAvgWidthEq16_AArch64_neon
|
|
||||||
};
|
|
||||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
|
|
||||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
|
|
||||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
|
|
||||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
|
|
||||||
pMcFuncs->pfSampleAveraging = PixelAvg_c;
|
|
||||||
pMcFuncs->pMcChromaFunc = McChroma_c;
|
|
||||||
pMcFuncs->pMcLumaFunc = McLuma_c;
|
|
||||||
#if defined (X86_ASM)
|
|
||||||
if (uiCpuFlag & WELS_CPU_SSE2) {
|
|
||||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
|
|
||||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
|
|
||||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
|
|
||||||
pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
|
|
||||||
pMcFuncs->pMcChromaFunc = McChroma_sse2;
|
|
||||||
pMcFuncs->pMcLumaFunc = McLuma_sse2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
|
||||||
pMcFuncs->pMcChromaFunc = McChroma_ssse3;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif //(X86_ASM)
|
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
|
||||||
if (uiCpuFlag & WELS_CPU_NEON) {
|
|
||||||
pMcFuncs->pMcLumaFunc = EncMcLuma_neon;
|
|
||||||
pMcFuncs->pMcChromaFunc = EncMcChroma_neon;
|
|
||||||
pMcFuncs->pfSampleAveraging = PixelAvg_neon;
|
|
||||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
|
|
||||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
|
|
||||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAVE_NEON_AARCH64)
|
|
||||||
if (uiCpuFlag & WELS_CPU_NEON) {
|
|
||||||
pMcFuncs->pMcLumaFunc = EncMcLuma_AArch64_neon;
|
|
||||||
pMcFuncs->pMcChromaFunc = EncMcChroma_AArch64_neon;
|
|
||||||
pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
|
|
||||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
|
|
||||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
|
|
||||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -8,7 +8,6 @@ ENCODER_CPP_SRCS=\
|
|||||||
$(ENCODER_SRCDIR)/core/src/encoder_data_tables.cpp\
|
$(ENCODER_SRCDIR)/core/src/encoder_data_tables.cpp\
|
||||||
$(ENCODER_SRCDIR)/core/src/encoder_ext.cpp\
|
$(ENCODER_SRCDIR)/core/src/encoder_ext.cpp\
|
||||||
$(ENCODER_SRCDIR)/core/src/get_intra_predictor.cpp\
|
$(ENCODER_SRCDIR)/core/src/get_intra_predictor.cpp\
|
||||||
$(ENCODER_SRCDIR)/core/src/mc.cpp\
|
|
||||||
$(ENCODER_SRCDIR)/core/src/md.cpp\
|
$(ENCODER_SRCDIR)/core/src/md.cpp\
|
||||||
$(ENCODER_SRCDIR)/core/src/memory_align.cpp\
|
$(ENCODER_SRCDIR)/core/src/memory_align.cpp\
|
||||||
$(ENCODER_SRCDIR)/core/src/mv_pred.cpp\
|
$(ENCODER_SRCDIR)/core/src/mv_pred.cpp\
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include "codec_def.h"
|
#include "codec_def.h"
|
||||||
|
#include "macros.h"
|
||||||
#include "mc.h"
|
#include "mc.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
using namespace WelsDec;
|
using namespace WelsCommon;
|
||||||
|
|
||||||
#include "mc_test_common.h"
|
#include "mc_test_common.h"
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include "codec_def.h"
|
#include "codec_def.h"
|
||||||
|
#include "macros.h"
|
||||||
#include "mc.h"
|
#include "mc.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
using namespace WelsEnc;
|
using namespace WelsCommon;
|
||||||
|
|
||||||
#define InitMcFunc WelsInitMcFuncs
|
|
||||||
|
|
||||||
#include "mc_test_common.h"
|
#include "mc_test_common.h"
|
||||||
|
|
||||||
@@ -27,7 +26,7 @@ TEST (EncMcAvg, PixelAvg) {
|
|||||||
int32_t width = 8 << w;
|
int32_t width = 8 << w;
|
||||||
int32_t height = 16;
|
int32_t height = 16;
|
||||||
uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL);
|
uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL);
|
||||||
WelsInitMcFuncs (&sMcFunc, uiCpuFlag);
|
InitMcFunc (&sMcFunc, uiCpuFlag);
|
||||||
uint8_t uSrc1[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE];
|
uint8_t uSrc1[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE];
|
||||||
uint8_t uSrc2[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE];
|
uint8_t uSrc2[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE];
|
||||||
ENFORCE_STACK_ALIGN_2D (uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16);
|
ENFORCE_STACK_ALIGN_2D (uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16);
|
||||||
@@ -76,7 +75,7 @@ TEST (EncMcHalfpel, LumaHalfpel) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL);
|
uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL);
|
||||||
WelsInitMcFuncs (&sMcFunc, uiCpuFlag);
|
InitMcFunc (&sMcFunc, uiCpuFlag);
|
||||||
|
|
||||||
MCHalfPelFilterAnchor (uAnchors[1], uAnchors[2], uAnchors[3], uAnchors[0], MC_BUFF_SRC_STRIDE, width, height, pBuf + 4);
|
MCHalfPelFilterAnchor (uAnchors[1], uAnchors[2], uAnchors[3], uAnchors[0], MC_BUFF_SRC_STRIDE, width, height, pBuf + 4);
|
||||||
sMcFunc.pfLumaHalfpelHor (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height);
|
sMcFunc.pfLumaHalfpelHor (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height);
|
||||||
|
|||||||
Reference in New Issue
Block a user