diff --git a/build/gtest-targets.mk b/build/gtest-targets.mk index 620be2f1..fe711968 100644 --- a/build/gtest-targets.mk +++ b/build/gtest-targets.mk @@ -5,6 +5,7 @@ GTEST_CPP_SRCS=\ GTEST_OBJS += $(GTEST_CPP_SRCS:.cc=.$(OBJ)) OBJS += $(GTEST_OBJS) + $(GTEST_SRCDIR)/%.$(OBJ): $(GTEST_SRCDIR)/%.cc $(QUIET_CXX)$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(GTEST_CFLAGS) $(GTEST_INCLUDES) -c $(CXX_O) $< diff --git a/codec/build/iOS/common/common.xcodeproj/project.pbxproj b/codec/build/iOS/common/common.xcodeproj/project.pbxproj index b4401cad..c39c6258 100644 --- a/codec/build/iOS/common/common.xcodeproj/project.pbxproj +++ b/codec/build/iOS/common/common.xcodeproj/project.pbxproj @@ -25,6 +25,9 @@ F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */; }; F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5B8D82C190757290037849A /* mc_aarch64_neon.S */; }; F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */; }; + F791965419D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */; }; + F791965619D3B8A600F60C6B /* intra_pred_common_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */; }; + F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F791965819D3BE2200F60C6B /* intra_pred_common.cpp */; }; FAABAA1818E9354A00D4186F /* sad_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FAABAA1718E9354A00D4186F /* sad_common.cpp */; }; /* End PBXBuildFile section */ @@ -74,6 +77,10 @@ F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = deblocking_aarch64_neon.S; path = arm64/deblocking_aarch64_neon.S; sourceTree = ""; }; F5B8D82C190757290037849A /* mc_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = mc_aarch64_neon.S; path = arm64/mc_aarch64_neon.S; sourceTree = ""; }; F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = copy_mb_aarch64_neon.S; path = arm64/copy_mb_aarch64_neon.S; sourceTree = ""; }; + F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = intra_pred_common_aarch64_neon.S; path = arm64/intra_pred_common_aarch64_neon.S; sourceTree = ""; }; + F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = intra_pred_common_neon.S; sourceTree = ""; }; + F791965719D3BA9300F60C6B /* intra_pred_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intra_pred_common.h; sourceTree = ""; }; + F791965819D3BE2200F60C6B /* intra_pred_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = intra_pred_common.cpp; sourceTree = ""; }; FAABAA1618E9353F00D4186F /* sad_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sad_common.h; sourceTree = ""; }; FAABAA1718E9354A00D4186F /* sad_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sad_common.cpp; sourceTree = ""; }; /* End PBXFileReference section */ @@ -93,6 +100,7 @@ 4C3406B118D96EA600DFA14A /* arm */ = { isa = PBXGroup; children = ( + F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */, 4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */, 4C3406B218D96EA600DFA14A /* arm_arch_common_macro.S */, 4C3406B318D96EA600DFA14A /* deblocking_neon.S */, @@ -105,6 +113,7 @@ 4C3406B618D96EA600DFA14A /* inc */ = { isa = PBXGroup; children = ( + F791965719D3BA9300F60C6B /* intra_pred_common.h */, F0B204F718FD23B6005DA23F /* copy_mb.h */, FAABAA1618E9353F00D4186F /* sad_common.h */, 4C3406B718D96EA600DFA14A /* cpu.h */, @@ -126,6 +135,7 @@ 4C3406C318D96EA600DFA14A /* src */ = { isa = PBXGroup; children = ( + F791965819D3BE2200F60C6B /* intra_pred_common.cpp */, 5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */, F0B204F818FD23BF005DA23F /* copy_mb.cpp */, FAABAA1718E9354A00D4186F /* sad_common.cpp */, @@ -179,6 +189,7 @@ F556A81D1906669F00E156A8 /* arm64 */ = { isa = PBXGroup; children = ( + F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */, F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */, F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */, F5B8D82C190757290037849A /* mc_aarch64_neon.S */, @@ -244,12 +255,15 @@ F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */, F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */, 4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */, + F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */, 4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */, 5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */, + F791965419D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S in Sources */, 4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */, 4C3406CC18D96EA600DFA14A /* mc_neon.S in Sources */, F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */, 4C3406CB18D96EA600DFA14A /* expand_picture_neon.S in Sources */, + F791965619D3B8A600F60C6B /* intra_pred_common_neon.S in Sources */, 4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */, 53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */, 4C3406CD18D96EA600DFA14A /* cpu.cpp in Sources */, diff --git a/codec/build/iOS/processing/processing.xcodeproj/project.pbxproj b/codec/build/iOS/processing/processing.xcodeproj/project.pbxproj index 71e490d3..b859521e 100644 --- a/codec/build/iOS/processing/processing.xcodeproj/project.pbxproj +++ b/codec/build/iOS/processing/processing.xcodeproj/project.pbxproj @@ -17,7 +17,6 @@ 549947E2196A3FB400BA3D87 /* pixel_sad_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 549947AE196A3FB400BA3D87 /* pixel_sad_neon.S */; }; 549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 549947AF196A3FB400BA3D87 /* vaa_calc_neon.S */; }; 549947E4196A3FB400BA3D87 /* BackgroundDetection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B1196A3FB400BA3D87 /* BackgroundDetection.cpp */; }; - 549947E5196A3FB400BA3D87 /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B4196A3FB400BA3D87 /* common.cpp */; }; 549947E6196A3FB400BA3D87 /* memory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B6196A3FB400BA3D87 /* memory.cpp */; }; 549947E7196A3FB400BA3D87 /* WelsFrameWork.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947BB196A3FB400BA3D87 /* WelsFrameWork.cpp */; }; 549947E8196A3FB400BA3D87 /* WelsFrameWorkEx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947BD196A3FB400BA3D87 /* WelsFrameWorkEx.cpp */; }; @@ -34,6 +33,7 @@ 549947F3196A3FB400BA3D87 /* vaacalcfuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D8196A3FB400BA3D87 /* vaacalcfuncs.cpp */; }; 549947F4196A3FB400BA3D87 /* vaacalculation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D9196A3FB400BA3D87 /* vaacalculation.cpp */; }; 6C749B78197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */; }; + F791965B19D3BF6B00F60C6B /* intra_pred_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -64,7 +64,6 @@ 549947AF196A3FB400BA3D87 /* vaa_calc_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = vaa_calc_neon.S; sourceTree = ""; }; 549947B1196A3FB400BA3D87 /* BackgroundDetection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BackgroundDetection.cpp; sourceTree = ""; }; 549947B2196A3FB400BA3D87 /* BackgroundDetection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BackgroundDetection.h; sourceTree = ""; }; - 549947B4196A3FB400BA3D87 /* common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common.cpp; sourceTree = ""; }; 549947B5196A3FB400BA3D87 /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = common.h; sourceTree = ""; }; 549947B6196A3FB400BA3D87 /* memory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memory.cpp; sourceTree = ""; }; 549947B7196A3FB400BA3D87 /* memory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory.h; sourceTree = ""; }; @@ -97,6 +96,7 @@ 549947D9196A3FB400BA3D87 /* vaacalculation.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vaacalculation.cpp; sourceTree = ""; }; 549947DA196A3FB400BA3D87 /* vaacalculation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vaacalculation.h; sourceTree = ""; }; 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = adaptive_quantization_aarch64_neon.S; path = arm64/adaptive_quantization_aarch64_neon.S; sourceTree = ""; }; + F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = intra_pred_common.cpp; path = ../../../common/src/intra_pred_common.cpp; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -210,7 +210,7 @@ 549947B3196A3FB400BA3D87 /* common */ = { isa = PBXGroup; children = ( - 549947B4196A3FB400BA3D87 /* common.cpp */, + F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */, 549947B5196A3FB400BA3D87 /* common.h */, 549947B6196A3FB400BA3D87 /* memory.cpp */, 549947B7196A3FB400BA3D87 /* memory.h */, @@ -351,7 +351,6 @@ 549947E9196A3FB400BA3D87 /* ComplexityAnalysis.cpp in Sources */, 549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */, 549947EE196A3FB400BA3D87 /* imagerotate.cpp in Sources */, - 549947E5196A3FB400BA3D87 /* common.cpp in Sources */, 549947EA196A3FB400BA3D87 /* denoise.cpp in Sources */, 549947E7196A3FB400BA3D87 /* WelsFrameWork.cpp in Sources */, 549947F1196A3FB400BA3D87 /* ScrollDetection.cpp in Sources */, @@ -367,6 +366,7 @@ 4CC6094F197E009D00BE8B8B /* down_sample_aarch64_neon.S in Sources */, 4CC6095A1980F34F00BE8B8B /* vaa_calc_aarch64_neon.S in Sources */, 549947F2196A3FB400BA3D87 /* ScrollDetectionFuncs.cpp in Sources */, + F791965B19D3BF6B00F60C6B /* intra_pred_common.cpp in Sources */, 549947EF196A3FB400BA3D87 /* imagerotatefuncs.cpp in Sources */, 549947DF196A3FB400BA3D87 /* AdaptiveQuantization.cpp in Sources */, 549947EC196A3FB400BA3D87 /* downsample.cpp in Sources */, diff --git a/codec/build/win32/enc/WelsEncCore.vcproj b/codec/build/win32/enc/WelsEncCore.vcproj index 051bc8db..86199a08 100644 --- a/codec/build/win32/enc/WelsEncCore.vcproj +++ b/codec/build/win32/enc/WelsEncCore.vcproj @@ -381,6 +381,10 @@ RelativePath="..\..\..\encoder\core\src\get_intra_predictor.cpp" > + + @@ -959,6 +963,46 @@ /> + + + + + + + + + + + + + + diff --git a/codec/common/arm/intra_pred_common_neon.S b/codec/common/arm/intra_pred_common_neon.S new file mode 100644 index 00000000..c730520e --- /dev/null +++ b/codec/common/arm/intra_pred_common_neon.S @@ -0,0 +1,83 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#ifdef HAVE_NEON +.text +#include "arm_arch_common_macro.S" + +WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon + //Get the top line data to 'q0' + sub r3, r1, r2 + vldm r3, {d0, d1} + + //mov r2, #16 + mov r3, #4 + //Set the top line to the each line of MB(16*16) +loop_0_get_i16x16_luma_pred_v: + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + vst1.8 {d0,d1}, [r0]! + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_v +WELS_ASM_FUNC_END + + +WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon + //stmdb sp!, {r4, lr} + sub r1, r1, #1 + mov r3, #4 +loop_0_get_i16x16_luma_pred_h: + //Get one byte data from left side + vld1.8 {d0[],d1[]}, [r1], r2 + vld1.8 {d2[],d3[]}, [r1], r2 + vld1.8 {d4[],d5[]}, [r1], r2 + vld1.8 {d6[],d7[]}, [r1], r2 + + //Set the line of MB using the left side byte data + vst1.8 {d0,d1}, [r0]! + //add r0, #16 + vst1.8 {d2,d3}, [r0]! + //add r0, #16 + vst1.8 {d4,d5}, [r0]! + //add r0, #16 + vst1.8 {d6,d7}, [r0]! + //add r0, #16 + + subs r3, #1 + bne loop_0_get_i16x16_luma_pred_h + +WELS_ASM_FUNC_END + + +#endif diff --git a/codec/common/arm64/intra_pred_common_aarch64_neon.S b/codec/common/arm64/intra_pred_common_aarch64_neon.S new file mode 100644 index 00000000..7568e0cb --- /dev/null +++ b/codec/common/arm64/intra_pred_common_aarch64_neon.S @@ -0,0 +1,55 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +.text +#include "arm_arch64_common_macro.S" + +//for Luma 16x16 +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon + sub x3, x1, x2 + ld1 {v0.16b}, [x3] +.rept 16 + st1 {v0.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END + +WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon + sub x3, x1, #1 +.rept 16 + ld1r {v0.16b}, [x3], x2 + st1 {v0.16b}, [x0], 16 +.endr +WELS_ASM_AARCH64_FUNC_END + +#endif + diff --git a/codec/common/inc/intra_pred_common.h b/codec/common/inc/intra_pred_common.h new file mode 100644 index 00000000..6f4461d9 --- /dev/null +++ b/codec/common/inc/intra_pred_common.h @@ -0,0 +1,76 @@ +/*! + * \copy + * Copyright (c) 2009-2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file intra_pred_common.h + * + * \brief interfaces for intra predictor about 16x16. + * + * \date 4/2/2014 Created + * + ************************************************************************************* + */ + +#ifndef INTRA_PRED_COMMON_H +#define INTRA_PRED_COMMON_H + +#include "typedefs.h" + + +void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + + +#if defined(__cplusplus) +extern "C" { +#endif//__cplusplus + +#if defined(X86_ASM) +//for intra-prediction ASM functions +void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//X86_ASM + +#if defined(HAVE_NEON) +void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_NEON + +#if defined(HAVE_NEON_AARCH64) +void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +#endif//HAVE_NEON_AARCH64 +#if defined(__cplusplus) +} +#endif//__cplusplus +#endif// + + + diff --git a/codec/processing/src/common/common.cpp b/codec/common/src/intra_pred_common.cpp similarity index 83% rename from codec/processing/src/common/common.cpp rename to codec/common/src/intra_pred_common.cpp index 6327b15d..2a28cf60 100644 --- a/codec/processing/src/common/common.cpp +++ b/codec/common/src/intra_pred_common.cpp @@ -1,6 +1,6 @@ /*! * \copy - * Copyright (c) 2013, Cisco Systems + * Copyright (c) 2009-2013, Cisco Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,12 +28,21 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * + * + * \file get_intra_predictor.c + * + * \brief implementation for get intra predictor about 16x16, 4x4, chroma. + * + * \date 4/2/2009 Created + * 9/14/2009 C level based optimization with high performance gained. + * [const, using ST32/ST64 to replace memset, memcpy and memmove etc.] + * + ************************************************************************************* */ - -#include "common.h" #include "ls_defines.h" +#include "cpu_core.h" +#include "intra_pred_common.h" -WELSVP_NAMESPACE_BEGIN void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { uint8_t i = 15; @@ -66,4 +75,3 @@ void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid } while (i-- > 0); } -WELSVP_NAMESPACE_END diff --git a/codec/common/targets.mk b/codec/common/targets.mk index 296d3915..507455ab 100644 --- a/codec/common/targets.mk +++ b/codec/common/targets.mk @@ -6,6 +6,7 @@ COMMON_CPP_SRCS=\ $(COMMON_SRCDIR)/src/crt_util_safe_x.cpp\ $(COMMON_SRCDIR)/src/deblocking_common.cpp\ $(COMMON_SRCDIR)/src/expand_pic.cpp\ + $(COMMON_SRCDIR)/src/intra_pred_common.cpp\ $(COMMON_SRCDIR)/src/sad_common.cpp\ $(COMMON_SRCDIR)/src/utils.cpp\ $(COMMON_SRCDIR)/src/welsCodecTrace.cpp\ @@ -17,6 +18,7 @@ COMMON_ASM_SRCS=\ $(COMMON_SRCDIR)/x86/cpuid.asm\ $(COMMON_SRCDIR)/x86/deblock.asm\ $(COMMON_SRCDIR)/x86/expand_picture.asm\ + $(COMMON_SRCDIR)/x86/intra_pred_com.asm\ $(COMMON_SRCDIR)/x86/mb_copy.asm\ $(COMMON_SRCDIR)/x86/mc_chroma.asm\ $(COMMON_SRCDIR)/x86/mc_luma.asm\ @@ -33,6 +35,7 @@ COMMON_ASM_ARM_SRCS=\ $(COMMON_SRCDIR)/arm/copy_mb_neon.S\ $(COMMON_SRCDIR)/arm/deblocking_neon.S\ $(COMMON_SRCDIR)/arm/expand_picture_neon.S\ + $(COMMON_SRCDIR)/arm/intra_pred_common_neon.S\ $(COMMON_SRCDIR)/arm/mc_neon.S\ COMMON_OBJSARM += $(COMMON_ASM_ARM_SRCS:.S=.$(OBJ)) @@ -45,6 +48,7 @@ COMMON_ASM_ARM64_SRCS=\ $(COMMON_SRCDIR)/arm64/copy_mb_aarch64_neon.S\ $(COMMON_SRCDIR)/arm64/deblocking_aarch64_neon.S\ $(COMMON_SRCDIR)/arm64/expand_picture_aarch64_neon.S\ + $(COMMON_SRCDIR)/arm64/intra_pred_common_aarch64_neon.S\ $(COMMON_SRCDIR)/arm64/mc_aarch64_neon.S\ COMMON_OBJSARM64 += $(COMMON_ASM_ARM64_SRCS:.S=.$(OBJ)) diff --git a/codec/common/x86/intra_pred_com.asm b/codec/common/x86/intra_pred_com.asm new file mode 100644 index 00000000..b1172cde --- /dev/null +++ b/codec/common/x86/intra_pred_com.asm @@ -0,0 +1,117 @@ +;*! +;* \copy +;* Copyright (c) 2009-2013, Cisco Systems +;* All rights reserved. +;* +;* Redistribution and use in source and binary forms, with or without +;* modification, are permitted provided that the following conditions +;* are met: +;* +;* * Redistributions of source code must retain the above copyright +;* notice, this list of conditions and the following disclaimer. +;* +;* * Redistributions in binary form must reproduce the above copyright +;* notice, this list of conditions and the following disclaimer in +;* the documentation and/or other materials provided with the +;* distribution. +;* +;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;* POSSIBILITY OF SUCH DAMAGE. +;* +;* +;* intra_pred_common.asm +;* +;* Abstract +;* sse2 function for intra predict operations +;* +;* History +;* 18/09/2009 Created +;* +;* +;*************************************************************************/ +%include "asm_inc.asm" + +;*********************************************************************** +; Local Data (Read Only) +;*********************************************************************** + +SECTION .rodata align=16 + +;*********************************************************************** +; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); +;*********************************************************************** + +%macro SSE2_PRED_H_16X16_ONE_LINE 0 + add r0, 16 + add r1, r2 + movzx r3, byte [r1] + SSE2_Copy16Times xmm0, r3d + movdqa [r0], xmm0 +%endmacro + +WELS_EXTERN WelsI16x16LumaPredH_sse2 + push r3 + %assign push_num 1 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + dec r1 + movzx r3, byte [r1] + SSE2_Copy16Times xmm0, r3d + movdqa [r0], xmm0 + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + SSE2_PRED_H_16X16_ONE_LINE + pop r3 + ret + +;*********************************************************************** +; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); +;*********************************************************************** +WELS_EXTERN WelsI16x16LumaPredV_sse2 + %assign push_num 0 + LOAD_3_PARA + SIGN_EXTENSION r2, r2d + sub r1, r2 + movdqa xmm0, [r1] + + movdqa [r0], xmm0 + movdqa [r0+10h], xmm0 + movdqa [r0+20h], xmm0 + movdqa [r0+30h], xmm0 + movdqa [r0+40h], xmm0 + movdqa [r0+50h], xmm0 + movdqa [r0+60h], xmm0 + movdqa [r0+70h], xmm0 + movdqa [r0+80h], xmm0 + movdqa [r0+90h], xmm0 + movdqa [r0+160], xmm0 + movdqa [r0+176], xmm0 + movdqa [r0+192], xmm0 + movdqa [r0+208], xmm0 + movdqa [r0+224], xmm0 + movdqa [r0+240], xmm0 + + ret + diff --git a/codec/encoder/core/arm/intra_pred_neon.S b/codec/encoder/core/arm/intra_pred_neon.S index 1697fde2..ca2475ad 100644 --- a/codec/encoder/core/arm/intra_pred_neon.S +++ b/codec/encoder/core/arm/intra_pred_neon.S @@ -62,51 +62,6 @@ #endif -WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon - //Get the top line data to 'q0' - sub r3, r1, r2 - vldm r3, {d0, d1} - - //mov r2, #16 - mov r3, #4 - //Set the top line to the each line of MB(16*16) -loop_0_get_i16x16_luma_pred_v: - vst1.8 {d0,d1}, [r0]! - vst1.8 {d0,d1}, [r0]! - vst1.8 {d0,d1}, [r0]! - vst1.8 {d0,d1}, [r0]! - subs r3, #1 - bne loop_0_get_i16x16_luma_pred_v -WELS_ASM_FUNC_END - - -WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon - //stmdb sp!, {r4, lr} - sub r1, r1, #1 - mov r3, #4 -loop_0_get_i16x16_luma_pred_h: - //Get one byte data from left side - vld1.8 {d0[],d1[]}, [r1], r2 - vld1.8 {d2[],d3[]}, [r1], r2 - vld1.8 {d4[],d5[]}, [r1], r2 - vld1.8 {d6[],d7[]}, [r1], r2 - - //Set the line of MB using the left side byte data - vst1.8 {d0,d1}, [r0]! - //add r0, #16 - vst1.8 {d2,d3}, [r0]! - //add r0, #16 - vst1.8 {d4,d5}, [r0]! - //add r0, #16 - vst1.8 {d6,d7}, [r0]! - //add r0, #16 - - subs r3, #1 - bne loop_0_get_i16x16_luma_pred_h - -WELS_ASM_FUNC_END - - WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredDc_neon //stmdb sp!, { r2-r5, lr} //Get the left vertical line data diff --git a/codec/encoder/core/arm64/intra_pred_aarch64_neon.S b/codec/encoder/core/arm64/intra_pred_aarch64_neon.S index 2a430217..ba8ddd95 100644 --- a/codec/encoder/core/arm64/intra_pred_aarch64_neon.S +++ b/codec/encoder/core/arm64/intra_pred_aarch64_neon.S @@ -349,23 +349,6 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredPlane_AArch64_neon .endr WELS_ASM_AARCH64_FUNC_END -//for Luma 16x16 -WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon - sub x3, x1, x2 - ld1 {v0.16b}, [x3] -.rept 16 - st1 {v0.16b}, [x0], 16 -.endr -WELS_ASM_AARCH64_FUNC_END - -WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon - sub x3, x1, #1 -.rept 16 - ld1r {v0.16b}, [x3], x2 - st1 {v0.16b}, [x0], 16 -.endr -WELS_ASM_AARCH64_FUNC_END - WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredDc_AArch64_neon sub x3, x1, x2 sub x4, x1, #1 diff --git a/codec/encoder/core/inc/get_intra_predictor.h b/codec/encoder/core/inc/get_intra_predictor.h index 2a655890..b789fe8a 100644 --- a/codec/encoder/core/inc/get_intra_predictor.h +++ b/codec/encoder/core/inc/get_intra_predictor.h @@ -74,8 +74,6 @@ void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid void WelsI16x16ChromaPredVer (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16ChromaPredHor (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); -void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); -void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); @@ -94,8 +92,6 @@ void WelsFillingPred8x2to16_sse2 (uint8_t* pPred, uint8_t* pValue); void WelsFillingPred1to16_sse2 (uint8_t* pPred, const uint8_t kuiValue); //for intra-prediction ASM functions -void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); -void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredDc_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredPlane_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); @@ -116,8 +112,6 @@ void WelsI4x4LumaPredHU_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStri #endif//X86_ASM #if defined(HAVE_NEON) -void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); -void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredDc_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); @@ -137,8 +131,6 @@ void WelsIChromaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiS #endif//HAVE_NEON #if defined(HAVE_NEON_AARCH64) -void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); -void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredDc_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); void WelsI16x16LumaPredDcTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); diff --git a/codec/encoder/core/src/get_intra_predictor.cpp b/codec/encoder/core/src/get_intra_predictor.cpp index 5b9679bc..702cb737 100644 --- a/codec/encoder/core/src/get_intra_predictor.cpp +++ b/codec/encoder/core/src/get_intra_predictor.cpp @@ -41,6 +41,7 @@ */ #include "ls_defines.h" #include "cpu_core.h" +#include "intra_pred_common.h" #include "get_intra_predictor.h" namespace WelsEnc { @@ -538,37 +539,6 @@ void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid } -void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { - uint8_t i = 15; - const int8_t* kpSrc = (int8_t*)&pRef[-kiStride]; - const uint64_t kuiT1 = LD64 (kpSrc); - const uint64_t kuiT2 = LD64 (kpSrc + 8); - uint8_t* pDst = pPred; - - do { - ST64 (pDst , kuiT1); - ST64 (pDst + 8, kuiT2); - pDst += 16; - } while (i-- > 0); -} - -void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { - int32_t iStridex15 = (kiStride << 4) - kiStride; - int32_t iPredStride = 16; - int32_t iPredStridex15 = 240; //(iPredStride<<4)-iPredStride; - uint8_t i = 15; - - do { - const uint8_t kuiSrc8 = pRef[iStridex15 - 1]; - const uint64_t kuiV64 = (uint64_t) (0x0101010101010101ULL * kuiSrc8); - ST64 (&pPred[iPredStridex15], kuiV64); - ST64 (&pPred[iPredStridex15 + 8], kuiV64); - - iStridex15 -= kiStride; - iPredStridex15 -= iPredStride; - } while (i-- > 0); -} - void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { int32_t iLTshift = 0, iTopshift = 0, iLeftshift = 0, iTopSum = 0, iLeftSum = 0; int32_t i, j; diff --git a/codec/encoder/core/src/sample.cpp b/codec/encoder/core/src/sample.cpp index e6005e23..c88fc3c5 100644 --- a/codec/encoder/core/src/sample.cpp +++ b/codec/encoder/core/src/sample.cpp @@ -40,7 +40,7 @@ #include "sample.h" #include "sad_common.h" - +#include "intra_pred_common.h" #include "mc.h" #include "cpu_core.h" @@ -250,8 +250,8 @@ int32_t WelsSampleSadIntra8x8Combined3_c (uint8_t* pDecCb, int32_t iDecStride, u } extern void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); -extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); -extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +//extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); +//extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride); int32_t WelsSampleSatdIntra16x16Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride, int32_t* pBestMode, int32_t iLambda, uint8_t* pDst) { diff --git a/codec/encoder/core/x86/intra_pred.asm b/codec/encoder/core/x86/intra_pred.asm index acbc265d..31526831 100644 --- a/codec/encoder/core/x86/intra_pred.asm +++ b/codec/encoder/core/x86/intra_pred.asm @@ -306,74 +306,6 @@ get_i16x16_luma_pred_plane_sse2_1: pop r3 ret -;*********************************************************************** -; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); -;*********************************************************************** - -%macro SSE2_PRED_H_16X16_ONE_LINE 0 - add r0, 16 - add r1, r2 - movzx r3, byte [r1] - SSE2_Copy16Times xmm0, r3d - movdqa [r0], xmm0 -%endmacro - -WELS_EXTERN WelsI16x16LumaPredH_sse2 - push r3 - %assign push_num 1 - LOAD_3_PARA - SIGN_EXTENSION r2, r2d - dec r1 - movzx r3, byte [r1] - SSE2_Copy16Times xmm0, r3d - movdqa [r0], xmm0 - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - SSE2_PRED_H_16X16_ONE_LINE - pop r3 - ret - -;*********************************************************************** -; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); -;*********************************************************************** -WELS_EXTERN WelsI16x16LumaPredV_sse2 - %assign push_num 0 - LOAD_3_PARA - SIGN_EXTENSION r2, r2d - sub r1, r2 - movdqa xmm0, [r1] - - movdqa [r0], xmm0 - movdqa [r0+10h], xmm0 - movdqa [r0+20h], xmm0 - movdqa [r0+30h], xmm0 - movdqa [r0+40h], xmm0 - movdqa [r0+50h], xmm0 - movdqa [r0+60h], xmm0 - movdqa [r0+70h], xmm0 - movdqa [r0+80h], xmm0 - movdqa [r0+90h], xmm0 - movdqa [r0+160], xmm0 - movdqa [r0+176], xmm0 - movdqa [r0+192], xmm0 - movdqa [r0+208], xmm0 - movdqa [r0+224], xmm0 - movdqa [r0+240], xmm0 - - ret - ;*********************************************************************** ; void WelsIChromaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride); ;*********************************************************************** diff --git a/codec/processing/build/win32/WelsVP.vcproj b/codec/processing/build/win32/WelsVP.vcproj index 94c940d4..fba42fbe 100644 --- a/codec/processing/build/win32/WelsVP.vcproj +++ b/codec/processing/build/win32/WelsVP.vcproj @@ -358,11 +358,11 @@ UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" > + + @@ -573,6 +577,46 @@ /> + + + + + + + + + + + + + + diff --git a/codec/processing/src/common/common.h b/codec/processing/src/common/common.h index f5287e95..2268cec4 100644 --- a/codec/processing/src/common/common.h +++ b/codec/processing/src/common/common.h @@ -38,14 +38,24 @@ * */ -#ifndef WELSVP_SCENECHANGEDETECTIONCOMMON_H -#define WELSVP_SCENECHANGEDETECTIONCOMMON_H +#ifndef WELSVP_COMMON_H +#define WELSVP_COMMON_H #include "util.h" #include "memory.h" #include "WelsFrameWork.h" #include "IWelsVP.h" #include "sad_common.h" +#include "intra_pred_common.h" + + + +typedef void (GetIntraPred) (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); + +typedef GetIntraPred* GetIntraPredPtr; + +GetIntraPred WelsI16x16LumaPredV_c; +GetIntraPred WelsI16x16LumaPredH_c; WELSVP_NAMESPACE_BEGIN @@ -56,12 +66,6 @@ typedef SadFunc* SadFuncPtr; typedef int32_t (Sad16x16Func) (uint8_t* pSrcY, int32_t iSrcStrideY, uint8_t* pRefY, int32_t iRefStrideY); typedef Sad16x16Func* PSad16x16Func; -typedef void (GetIntraPred) (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); - -typedef GetIntraPred* GetIntraPredPtr; - -GetIntraPred WelsI16x16LumaPredV_c; -GetIntraPred WelsI16x16LumaPredH_c; #ifdef HAVE_NEON WELSVP_EXTERN_C_BEGIN diff --git a/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp b/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp index ed594e19..12cc842d 100644 --- a/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp +++ b/codec/processing/src/complexityanalysis/ComplexityAnalysis.cpp @@ -33,10 +33,10 @@ #include "ComplexityAnalysis.h" #include "cpu.h" #include "macros.h" +#include "intra_pred_common.h" WELSVP_NAMESPACE_BEGIN - /////////////////////////////////////////////////////////////////////////////////////////////////////////////// CComplexityAnalysis::CComplexityAnalysis (int32_t iCpuFlag) { @@ -280,8 +280,29 @@ CComplexityAnalysisScreen::CComplexityAnalysisScreen (int32_t iCpuFlag) { #ifdef X86_ASM if (iCpuFlag & WELS_CPU_SSE2) { m_pSadFunc = WelsSampleSad16x16_sse2; + m_pIntraFunc[0] = WelsI16x16LumaPredV_sse2; + m_pIntraFunc[1] = WelsI16x16LumaPredH_sse2; + } #endif + +#if defined (HAVE_NEON) + if (iCpuFlag & WELS_CPU_NEON) { + m_pSadFunc = WelsSampleSad16x16_neon; + m_pIntraFunc[0] = WelsI16x16LumaPredV_neon; + m_pIntraFunc[1] = WelsI16x16LumaPredH_neon; + + } +#endif + +#if defined (HAVE_NEON_AARCH64) + if (iCpuFlag & WELS_CPU_NEON) { + m_pSadFunc = WelsSampleSad16x16_AArch64_neon; + m_pIntraFunc[0] = WelsI16x16LumaPredV_AArch64_neon; + m_pIntraFunc[1] = WelsI16x16LumaPredH_AArch64_neon; + } +#endif + } CComplexityAnalysisScreen::~CComplexityAnalysisScreen() { diff --git a/codec/processing/targets.mk b/codec/processing/targets.mk index 0fab677e..b14a6325 100644 --- a/codec/processing/targets.mk +++ b/codec/processing/targets.mk @@ -2,7 +2,6 @@ PROCESSING_SRCDIR=codec/processing PROCESSING_CPP_SRCS=\ $(PROCESSING_SRCDIR)/src/adaptivequantization/AdaptiveQuantization.cpp\ $(PROCESSING_SRCDIR)/src/backgrounddetection/BackgroundDetection.cpp\ - $(PROCESSING_SRCDIR)/src/common/common.cpp\ $(PROCESSING_SRCDIR)/src/common/memory.cpp\ $(PROCESSING_SRCDIR)/src/common/WelsFrameWork.cpp\ $(PROCESSING_SRCDIR)/src/common/WelsFrameWorkEx.cpp\