diff --git a/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj b/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj index 4323b303..77438204 100644 --- a/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj +++ b/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj @@ -12,6 +12,7 @@ 4C34067018C57D0400DFA14A /* memory_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066918C57D0400DFA14A /* memory_neon.S */; }; 4C34067118C57D0400DFA14A /* pixel_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066A18C57D0400DFA14A /* pixel_neon.S */; }; 4C34067218C57D0400DFA14A /* reconstruct_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4C34066B18C57D0400DFA14A /* reconstruct_neon.S */; }; + 4CB8F2B419235FC5005D6386 /* pixel_neon_aarch64.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CB8F2B319235FC5005D6386 /* pixel_neon_aarch64.S */; }; 4CE4431518B6FFA00017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE4431418B6FFA00017DF25 /* Foundation.framework */; }; 4CE4432318B6FFA00017DF25 /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE4432218B6FFA00017DF25 /* XCTest.framework */; }; 4CE4432418B6FFA00017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE4431418B6FFA00017DF25 /* Foundation.framework */; }; @@ -81,6 +82,7 @@ 4C34066918C57D0400DFA14A /* memory_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = memory_neon.S; sourceTree = ""; }; 4C34066A18C57D0400DFA14A /* pixel_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = pixel_neon.S; sourceTree = ""; }; 4C34066B18C57D0400DFA14A /* reconstruct_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = reconstruct_neon.S; sourceTree = ""; }; + 4CB8F2B319235FC5005D6386 /* pixel_neon_aarch64.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = pixel_neon_aarch64.S; path = arm64/pixel_neon_aarch64.S; sourceTree = ""; }; 4CDBFB9D18E5068D0025A767 /* wels_transpose_matrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wels_transpose_matrix.h; sourceTree = ""; }; 4CE4431118B6FFA00017DF25 /* libwelsenc.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libwelsenc.a; sourceTree = BUILT_PRODUCTS_DIR; }; 4CE4431418B6FFA00017DF25 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; @@ -210,6 +212,14 @@ path = arm; sourceTree = ""; }; + 4CB8F2B219235FAC005D6386 /* arm64 */ = { + isa = PBXGroup; + children = ( + 4CB8F2B319235FC5005D6386 /* pixel_neon_aarch64.S */, + ); + name = arm64; + sourceTree = ""; + }; 4CE4430818B6FFA00017DF25 = { isa = PBXGroup; children = ( @@ -270,6 +280,7 @@ 4CE446A118BC605B0017DF25 /* core */ = { isa = PBXGroup; children = ( + 4CB8F2B219235FAC005D6386 /* arm64 */, 4C34066418C57D0400DFA14A /* arm */, 4CE446A918BC605C0017DF25 /* inc */, 4CE446DC18BC605C0017DF25 /* src */, @@ -507,6 +518,7 @@ 4CE4471918BC605C0017DF25 /* memory_align.cpp in Sources */, 4CE4472418BC605C0017DF25 /* svc_enc_slice_segment.cpp in Sources */, 4CE4472318BC605C0017DF25 /* svc_base_layer_md.cpp in Sources */, + 4CB8F2B419235FC5005D6386 /* pixel_neon_aarch64.S in Sources */, 4CE4471E18BC605C0017DF25 /* ratectl.cpp in Sources */, 4C34066D18C57D0400DFA14A /* intra_pred_neon.S in Sources */, 4CE4471C18BC605C0017DF25 /* picture_handle.cpp in Sources */, @@ -648,6 +660,7 @@ "$(SRCROOT)/../../../../processing/interface", "$(SRCROOT)/../../../../api/svc", "$(SRCROOT)/../../../../common/arm", + "$(SRCROOT)/../../../../common/arm64", ); IPHONEOS_DEPLOYMENT_TARGET = 6.1; ONLY_ACTIVE_ARCH = NO; @@ -686,6 +699,7 @@ "$(SRCROOT)/../../../../processing/interface", "$(SRCROOT)/../../../../api/svc", "$(SRCROOT)/../../../../common/arm", + "$(SRCROOT)/../../../../common/arm64", ); IPHONEOS_DEPLOYMENT_TARGET = 6.1; OTHER_LDFLAGS = "-ObjC"; diff --git a/codec/common/inc/sad_common.h b/codec/common/inc/sad_common.h index f75a13ed..ef3e56f7 100644 --- a/codec/common/inc/sad_common.h +++ b/codec/common/inc/sad_common.h @@ -89,6 +89,19 @@ void WelsSampleSadFour4x4_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); #endif +#if defined (HAVE_NEON_AARCH64) +int32_t WelsSampleSad4x4_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad16x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); +int32_t WelsSampleSad8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t); + +void WelsSampleSadFour16x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour16x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +void WelsSampleSadFour4x4_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*); +#endif #if defined(__cplusplus) } #endif//__cplusplus diff --git a/codec/encoder/core/arm64/pixel_neon_aarch64.S b/codec/encoder/core/arm64/pixel_neon_aarch64.S new file mode 100755 index 00000000..7a834405 --- /dev/null +++ b/codec/encoder/core/arm64/pixel_neon_aarch64.S @@ -0,0 +1,398 @@ +/*! + * \copy + * Copyright (c) 2013, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifdef HAVE_NEON_AARCH64 +.text +#include "arm_arch64_common_macro.S" + +.macro CALC_AND_STORE_SAD + uaddlp v2.4s, v2.8h + addp v2.4s, v2.4s, v2.4s + addp v2.2s, v2.2s, v2.2s + umov w0, v2.s[0] +.endm + +.macro CALC_AND_STORE_SAD_FOUR + addp v0.8h, v28.8h, v29.8h + addp v1.8h, v30.8h, v31.8h + addp v0.8h, v0.8h, v1.8h + addp v0.8h, v0.8h, v0.8h + eor v1.8b, v1.8b, v1.8b + saddl v0.4s, v0.4h, v1.4h + st1 {v0.4s}, [x4] +.endm + +.macro LOAD_8X8_1 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x0], x1 + ld1 {v2.8b}, [x0], x1 + ld1 {v3.8b}, [x0], x1 + ld1 {v4.8b}, [x0], x1 + ld1 {v5.8b}, [x0], x1 + ld1 {v6.8b}, [x0], x1 + ld1 {v7.8b}, [x0], x1 +.endm + +.macro LOAD_8X8_2 + ld1 {v16.8b}, [$0], x3 + ld1 {v17.8b}, [$0], x3 + ld1 {v18.8b}, [$0], x3 + ld1 {v19.8b}, [$0], x3 + ld1 {v20.8b}, [$0], x3 + ld1 {v21.8b}, [$0], x3 + ld1 {v22.8b}, [$0], x3 + ld1 {v23.8b}, [$0], x3 +.endm + +.macro CALC_ABS_8X8_1 + uab$1l $0, v0.8b, v16.8b + uabal $0, v1.8b, v17.8b + uabal $0, v2.8b, v18.8b + uabal $0, v3.8b, v19.8b + uabal $0, v4.8b, v20.8b + uabal $0, v5.8b, v21.8b + uabal $0, v6.8b, v22.8b + uabal $0, v7.8b, v23.8b +.endm + +.macro CALC_ABS_8X8_2 + uab$0l v29.8h, v0.8b, v18.8b + uabal v29.8h, v1.8b, v19.8b + uabal v29.8h, v2.8b, v20.8b + uabal v29.8h, v3.8b, v21.8b + uabal v29.8h, v4.8b, v22.8b + uabal v29.8h, v5.8b, v23.8b + uabal v29.8h, v6.8b, v24.8b + uabal v29.8h, v7.8b, v25.8b +.endm + +.macro LOAD_16X8_1 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x0], x1 + ld1 {v2.16b}, [x0], x1 + ld1 {v3.16b}, [x0], x1 + ld1 {v4.16b}, [x0], x1 + ld1 {v5.16b}, [x0], x1 + ld1 {v6.16b}, [x0], x1 + ld1 {v7.16b}, [x0], x1 +.endm + +.macro LOAD_16X8_2 + ld1 {v16.16b}, [$0], x3 + ld1 {v17.16b}, [$0], x3 + ld1 {v18.16b}, [$0], x3 + ld1 {v19.16b}, [$0], x3 + ld1 {v20.16b}, [$0], x3 + ld1 {v21.16b}, [$0], x3 + ld1 {v22.16b}, [$0], x3 + ld1 {v23.16b}, [$0], x3 +.endm + +.macro CALC_ABS_16X8_1 + uab$1l $0, v0.8b, v16.8b + uabal2 $0, v0.16b,v16.16b + uabal $0, v1.8b, v17.8b + uabal2 $0, v1.16b,v17.16b + uabal $0, v2.8b, v18.8b + uabal2 $0, v2.16b,v18.16b + uabal $0, v3.8b, v19.8b + uabal2 $0, v3.16b,v19.16b + uabal $0, v4.8b, v20.8b + uabal2 $0, v4.16b,v20.16b + uabal $0, v5.8b, v21.8b + uabal2 $0, v5.16b,v21.16b + uabal $0, v6.8b, v22.8b + uabal2 $0, v6.16b,v22.16b + uabal $0, v7.8b, v23.8b + uabal2 $0, v7.16b,v23.16b +.endm + +.macro CALC_ABS_16X8_2 + uab$0l v29.8h, v0.8b, v18.8b + uabal2 v29.8h, v0.16b,v18.16b + uabal v29.8h, v1.8b, v19.8b + uabal2 v29.8h, v1.16b,v19.16b + uabal v29.8h, v2.8b, v20.8b + uabal2 v29.8h, v2.16b,v20.16b + uabal v29.8h, v3.8b, v21.8b + uabal2 v29.8h, v3.16b,v21.16b + uabal v29.8h, v4.8b, v22.8b + uabal2 v29.8h, v4.16b,v22.16b + uabal v29.8h, v5.8b, v23.8b + uabal2 v29.8h, v5.16b,v23.16b + uabal v29.8h, v6.8b, v24.8b + uabal2 v29.8h, v6.16b,v24.16b + uabal v29.8h, v7.8b, v25.8b + uabal2 v29.8h, v7.16b,v25.16b +.endm + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSad4x4_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v1.s}[0], [x2], x3 + uabdl v2.8h, v0.8b, v1.8b +.rept 3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v1.s}[0], [x2], x3 + uabal v2.8h, v0.8b, v1.8b +.endr + uaddlp v2.2s, v2.4h + addp v2.2s, v2.2s, v2.2s + umov w0, v2.s[0] +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSad8x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b +.rept 7 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b +.endr + CALC_AND_STORE_SAD +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSad8x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b +.rept 15 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b +.endr + CALC_AND_STORE_SAD +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSad16x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.rept 7 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.endr + CALC_AND_STORE_SAD +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSad16x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabdl v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.rept 15 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x2], x3 + uabal v2.8h, v0.8b, v1.8b + uabal2 v2.8h, v0.16b, v1.16b +.endr + CALC_AND_STORE_SAD +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSadFour4x4_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v0.s}[1], [x0], x1 + ld1 {v1.s}[0], [x0], x1 + ld1 {v1.s}[1], [x0] + sub x0, x2, x3 + ld1 {v2.s}[0], [x0], x3 + ld1 {v2.s}[1], [x0], x3 + ld1 {v3.s}[0], [x0], x3 + ld1 {v3.s}[1], [x0], x3 + ld1 {v4.s}[0], [x0], x3 + ld1 {v4.s}[1], [x0], x3 + + uabdl v28.8h, v0.8b, v2.8b + uabal v28.8h, v1.8b, v3.8b + + uabdl v29.8h, v0.8b, v3.8b + uabal v29.8h, v1.8b, v4.8b + + sub x0, x2, #1 + ld1 {v2.s}[0], [x0], x3 + ld1 {v2.s}[1], [x0], x3 + ld1 {v3.s}[0], [x0], x3 + ld1 {v3.s}[1], [x0] + uabdl v30.8h, v0.8b, v2.8b + uabal v30.8h, v1.8b, v3.8b + + add x0, x2, #1 + ld1 {v2.s}[0], [x0], x3 + ld1 {v2.s}[1], [x0], x3 + ld1 {v3.s}[0], [x0], x3 + ld1 {v3.s}[1], [x0] + uabdl v31.8h, v0.8b, v2.8b + uabal v31.8h, v1.8b, v3.8b + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSadFour8x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + LOAD_8X8_1 + sub x0, x2, x3 + LOAD_8X8_2 x0 + ld1 {v24.8b}, [x0], x3 + ld1 {v25.8b}, [x0] + + CALC_ABS_8X8_1 v28.8h, d + CALC_ABS_8X8_2 d + + sub x0, x2, #1 + LOAD_8X8_2 x0 + CALC_ABS_8X8_1 v30.8h, d + + add x0, x2, #1 + LOAD_8X8_2 x0 + CALC_ABS_8X8_1 v31.8h, d + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSadFour8x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + LOAD_8X8_1 + sub x5, x2, x3 + LOAD_8X8_2 x5 + ld1 {v24.8b}, [x5], x3 + ld1 {v25.8b}, [x5], x3 + + CALC_ABS_8X8_1 v28.8h, d + CALC_ABS_8X8_2 d + + sub x6, x2, #1 + LOAD_8X8_2 x6 + CALC_ABS_8X8_1 v30.8h, d + + add x7, x2, #1 + LOAD_8X8_2 x7 + CALC_ABS_8X8_1 v31.8h, d + + LOAD_8X8_1 + sub x5, x5, x3 + sub x5, x5, x3 + LOAD_8X8_2 x5 + ld1 {v24.8b}, [x5], x3 + ld1 {v25.8b}, [x5] + + CALC_ABS_8X8_1 v28.8h, a + CALC_ABS_8X8_2 a + + LOAD_8X8_2 x6 + CALC_ABS_8X8_1 v30.8h, a + + LOAD_8X8_2 x7 + CALC_ABS_8X8_1 v31.8h, a + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSadFour16x8_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + LOAD_16X8_1 + sub x0, x2, x3 + LOAD_16X8_2 x0 + ld1 {v24.16b}, [x0], x3 + ld1 {v25.16b}, [x0] + + CALC_ABS_16X8_1 v28.8h, d + CALC_ABS_16X8_2 d + + sub x0, x2, #1 + LOAD_16X8_2 x0 + CALC_ABS_16X8_1 v30.8h, d + + add x0, x2, #1 + LOAD_16X8_2 x0 + CALC_ABS_16X8_1 v31.8h, d + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_ARCH64_FUNC_END + +WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSadFour16x16_AArch64_neon + sxtw x1, w1 + sxtw x3, w3 + + LOAD_16X8_1 + sub x5, x2, x3 + LOAD_16X8_2 x5 + ld1 {v24.16b}, [x5], x3 + ld1 {v25.16b}, [x5], x3 + + CALC_ABS_16X8_1 v28.8h, d + CALC_ABS_16X8_2 d + + sub x6, x2, #1 + LOAD_16X8_2 x6 + CALC_ABS_16X8_1 v30.8h, d + + add x7, x2, #1 + LOAD_16X8_2 x7 + CALC_ABS_16X8_1 v31.8h, d + + LOAD_16X8_1 + sub x5, x5, x3 + sub x5, x5, x3 + LOAD_16X8_2 x5 + ld1 {v24.16b}, [x5], x3 + ld1 {v25.16b}, [x5] + + CALC_ABS_16X8_1 v28.8h, a + CALC_ABS_16X8_2 a + + LOAD_16X8_2 x6 + CALC_ABS_16X8_1 v30.8h, a + + LOAD_16X8_2 x7 + CALC_ABS_16X8_1 v31.8h, a + + CALC_AND_STORE_SAD_FOUR +WELS_ASM_ARCH64_FUNC_END +#endif \ No newline at end of file diff --git a/codec/encoder/core/src/sample.cpp b/codec/encoder/core/src/sample.cpp index bcc3465c..a52b2bb6 100644 --- a/codec/encoder/core/src/sample.cpp +++ b/codec/encoder/core/src/sample.cpp @@ -413,6 +413,23 @@ void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = WelsIntra16x16Combined3Sad_neon; } #endif + +#if defined (HAVE_NEON_AARCH64) + if (uiCpuFlag & WELS_CPU_NEON) { + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4 ] = WelsSampleSad4x4_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_AArch64_neon; + + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_AArch64_neon; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_AArch64_neon; + + } +#endif } } // namespace WelsSVCEnc diff --git a/codec/encoder/targets.mk b/codec/encoder/targets.mk index 52d3324a..49730ce7 100644 --- a/codec/encoder/targets.mk +++ b/codec/encoder/targets.mk @@ -60,6 +60,13 @@ ENCODER_ASM_ARM_SRCS=\ ENCODER_OBJS += $(ENCODER_ASM_ARM_SRCS:.S=.$(OBJ)) endif +ifeq ($(ASM_ARCH), arm64) +ENCODER_ASM_ARM64_SRCS=\ + $(ENCODER_SRCDIR)/core/arm64/pixel_neon_aarch64.S\ + +ENCODER_OBJS += $(ENCODER_ASM_ARM64_SRCS:.S=.$(OBJ)) +endif + OBJS += $(ENCODER_OBJS) $(ENCODER_SRCDIR)/%.$(OBJ): $(ENCODER_SRCDIR)/%.cpp $(QUIET_CXX)$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c $(CXX_O) $<