From dd340b7fe75dabd76a559902be91493d56d8d128 Mon Sep 17 00:00:00 2001 From: Karina Date: Thu, 14 Apr 2016 14:49:11 +0800 Subject: [PATCH] modify neon comment --- codec/common/arm64/copy_mb_aarch64_neon.S | 2 +- .../arm64/expand_picture_aarch64_neon.S | 6 +- .../arm64/intra_pred_common_aarch64_neon.S | 4 +- codec/common/arm64/mc_aarch64_neon.S | 150 +++++++++--------- .../core/arm64/intra_pred_aarch64_neon.S | 2 +- .../arm64/intra_pred_sad_3_opt_aarch64_neon.S | 10 +- .../encoder/core/arm64/memory_aarch64_neon.S | 2 +- .../core/arm64/reconstruct_aarch64_neon.S | 10 +- .../svc_motion_estimation_aarch64_neon.S | 10 +- 9 files changed, 98 insertions(+), 98 deletions(-) diff --git a/codec/common/arm64/copy_mb_aarch64_neon.S b/codec/common/arm64/copy_mb_aarch64_neon.S index 5c56bad7..4d9f1975 100644 --- a/codec/common/arm64/copy_mb_aarch64_neon.S +++ b/codec/common/arm64/copy_mb_aarch64_neon.S @@ -105,7 +105,7 @@ // } .endm -;void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +//void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x8_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 diff --git a/codec/common/arm64/expand_picture_aarch64_neon.S b/codec/common/arm64/expand_picture_aarch64_neon.S index 880f430d..933de27f 100644 --- a/codec/common/arm64/expand_picture_aarch64_neon.S +++ b/codec/common/arm64/expand_picture_aarch64_neon.S @@ -32,7 +32,7 @@ #ifdef HAVE_NEON_AARCH64 #include "arm_arch64_common_macro.S" -;void ExpandPictureLuma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); +//void ExpandPictureLuma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH); WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureLuma_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x2,w2 @@ -76,8 +76,8 @@ _expand_picture_luma_loop1: cbnz x2, _expand_picture_luma_loop0 WELS_ASM_AARCH64_FUNC_END -;void ExpandPictureChroma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, -; const int32_t kiPicH); +//void ExpandPictureChroma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, +// const int32_t kiPicH); WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureChroma_AArch64_neon //Save the dst SIGN_EXTENSION x1,w1 diff --git a/codec/common/arm64/intra_pred_common_aarch64_neon.S b/codec/common/arm64/intra_pred_common_aarch64_neon.S index 0eaca9d9..c18c2d46 100644 --- a/codec/common/arm64/intra_pred_common_aarch64_neon.S +++ b/codec/common/arm64/intra_pred_common_aarch64_neon.S @@ -34,7 +34,7 @@ #include "arm_arch64_common_macro.S" //for Luma 16x16 -;void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +//void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon SIGN_EXTENSION x2,w2 sub x3, x1, x2 @@ -44,7 +44,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon .endr WELS_ASM_AARCH64_FUNC_END -;void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +//void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon SIGN_EXTENSION x2,w2 sub x3, x1, #1 diff --git a/codec/common/arm64/mc_aarch64_neon.S b/codec/common/arm64/mc_aarch64_neon.S index a1f48a9e..e4e43f99 100644 --- a/codec/common/arm64/mc_aarch64_neon.S +++ b/codec/common/arm64/mc_aarch64_neon.S @@ -315,7 +315,7 @@ w16_h_mc_luma_loop: cbnz x4, w16_h_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,int32_t iHeight); +//void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20WidthEq8_AArch64_neon sub x0, x0, #2 stp d8,d9, [sp,#-16]! @@ -373,8 +373,8 @@ w8_h_mc_luma_loop: ldp d8,d9,[sp],#16 WELS_ASM_AARCH64_FUNC_END -;void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20WidthEq4_AArch64_neon sub x0, x0, #2 movi v0.8h, #20, lsl #0 @@ -413,8 +413,8 @@ w4_h_mc_luma_loop: cbnz x4, w4_h_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer10WidthEq16_AArch64_neon sub x0, x0, #2 movi v0.8h, #20, lsl #0 @@ -440,8 +440,8 @@ w16_xy_10_mc_luma_loop: cbnz x4, w16_xy_10_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer10WidthEq8_AArch64_neon sub x0, x0, #2 stp d8,d9, [sp,#-16]! @@ -501,8 +501,8 @@ w8_xy_10_mc_luma_loop: ldp d8,d9,[sp],#16 WELS_ASM_AARCH64_FUNC_END -;void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer10WidthEq4_AArch64_neon sub x0, x0, #2 movi v0.8h, #20, lsl #0 @@ -541,8 +541,8 @@ w4_xy_10_mc_luma_loop: cbnz x4, w4_xy_10_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer30WidthEq16_AArch64_neon sub x0, x0, #2 movi v0.8h, #20, lsl #0 @@ -568,8 +568,8 @@ w16_xy_30_mc_luma_loop: cbnz x4, w16_xy_30_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer30WidthEq8_AArch64_neon sub x0, x0, #2 stp d8,d9, [sp,#-16]! @@ -629,8 +629,8 @@ w8_xy_30_mc_luma_loop: ldp d8,d9,[sp],#16 WELS_ASM_AARCH64_FUNC_END -;void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer30WidthEq4_AArch64_neon sub x0, x0, #2 movi v0.8h, #20, lsl #0 @@ -669,8 +669,8 @@ w4_xy_30_mc_luma_loop: cbnz x4, w4_xy_30_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer01WidthEq16_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -756,8 +756,8 @@ w16_xy_01_mc_luma_loop: cbnz x4, w16_xy_01_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer01WidthEq8_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -800,8 +800,8 @@ w8_xy_01_mc_luma_loop: cbnz x4, w8_xy_01_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer01WidthEq4_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -860,8 +860,8 @@ w4_xy_01_mc_luma_loop: cbnz x4, w4_xy_01_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer03WidthEq16_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -947,8 +947,8 @@ w16_xy_03_mc_luma_loop: cbnz x4, w16_xy_03_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer03WidthEq8_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -991,8 +991,8 @@ w8_xy_03_mc_luma_loop: cbnz x4, w8_xy_03_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer03WidthEq4_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1051,8 +1051,8 @@ w4_xy_03_mc_luma_loop: cbnz x4, w4_xy_03_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02WidthEq16_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1138,8 +1138,8 @@ w16_xy_02_mc_luma_loop: cbnz x4, w16_xy_02_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02WidthEq8_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1178,8 +1178,8 @@ w8_xy_02_mc_luma_loop: cbnz x4, w8_xy_02_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02WidthEq4_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1238,8 +1238,8 @@ w4_xy_02_mc_luma_loop: cbnz x4, w4_xy_02_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22WidthEq16_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1408,8 +1408,8 @@ w16_hv_mc_luma_loop: ldp d8, d9, [sp], #16 WELS_ASM_AARCH64_FUNC_END -;void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22WidthEq8_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1483,8 +1483,8 @@ w8_hv_mc_luma_loop: sub x4, x4, #4 cbnz x4, w8_hv_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22WidthEq4_AArch64_neon SIGN_EXTENSION x1,w1 @@ -1558,8 +1558,8 @@ w4_hv_mc_luma_loop: sub x4, x4, #4 cbnz x4, w4_hv_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McCopyWidthEq16_AArch64_neon //prfm pldl1strm, [x0] SIGN_EXTENSION x1,w1 @@ -1576,8 +1576,8 @@ w16_copy_loop: sub x4, x4, #2 cbnz x4, w16_copy_loop WELS_ASM_AARCH64_FUNC_END -;void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McCopyWidthEq8_AArch64_neon //prfm pldl1strm, [x0] SIGN_EXTENSION x1,w1 @@ -1612,8 +1612,8 @@ w4_copy_loop: cbnz x4, w4_copy_loop WELS_ASM_AARCH64_FUNC_END -;void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, -;const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); +//void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, +//const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN PixStrideAvgWidthEq16_AArch64_neon SIGN_EXTENSION x1,w1 @@ -1651,8 +1651,8 @@ enc_w16_pix_avg_loop: cbnz x6, enc_w16_pix_avg_loop WELS_ASM_AARCH64_FUNC_END -;void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, -; const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); +//void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA, +// const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN PixStrideAvgWidthEq8_AArch64_neon //prfm pldl1strm, [x2] //prfm pldl1strm, [x4] @@ -1693,8 +1693,8 @@ enc_w8_pix_avg_loop: sub x6, x6, #4 cbnz x6, enc_w8_pix_avg_loop WELS_ASM_AARCH64_FUNC_END -;void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, -; const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +//void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, +// const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN PixelAvgWidthEq16_AArch64_neon //prfm pldl1strm, [x2] //prfm pldl1strm, [x4] @@ -1740,8 +1740,8 @@ w16_pix_avg_loop: sub x6, x6, #4 cbnz x6, w16_pix_avg_loop WELS_ASM_AARCH64_FUNC_END -;void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, -; const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +//void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, +// const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN PixelAvgWidthEq8_AArch64_neon //prfm pldl1strm, [x2] //prfm pldl1strm, [x4] @@ -1783,8 +1783,8 @@ w8_pix_avg_loop: cbnz x6, w8_pix_avg_loop WELS_ASM_AARCH64_FUNC_END -;void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, -; const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); +//void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride, +// const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN PixelAvgWidthEq4_AArch64_neon //prfm pldl1strm, [x2] //prfm pldl1strm, [x4] @@ -1808,8 +1808,8 @@ w4_pix_avg_loop: sub x6, x6, #2 cbnz x6, w4_pix_avg_loop WELS_ASM_AARCH64_FUNC_END -;void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t* pWeights, int32_t iHeight); +//void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t* pWeights, int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McChromaWidthEq8_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1867,8 +1867,8 @@ w8_mc_chroma_loop: sub x5, x5, #4 cbnz x5, w8_mc_chroma_loop WELS_ASM_AARCH64_FUNC_END -;void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t* pWeights, int32_t iHeight); +//void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t* pWeights, int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McChromaWidthEq4_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1901,8 +1901,8 @@ w4_mc_chroma_loop: cbnz x5, w4_mc_chroma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);// width+1 +//void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// width+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20Width17_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1935,8 +1935,8 @@ w17_h_mc_luma_loop: cbnz x4, w17_h_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);// width+1 +//void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// width+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20Width9_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1968,8 +1968,8 @@ w9_h_mc_luma_loop: cbnz x4, w9_h_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer20Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);// width+1 +//void McHorVer20Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// width+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer20Width5_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -1996,8 +1996,8 @@ w5_h_mc_luma_loop: cbnz x4, w5_h_mc_luma_loop WELS_ASM_AARCH64_FUNC_END -;void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight); +//void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight); WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22Width17_AArch64_neon stp d8, d9, [sp,#-16]! stp d10, d11, [sp,#-16]! @@ -2203,8 +2203,8 @@ w17_hv_mc_luma_loop: ldp d8, d9, [sp], #16 WELS_ASM_AARCH64_FUNC_END -;void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);//width+1&&height+1 +//void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);//width+1&&height+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22Width9_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -2303,8 +2303,8 @@ w9_hv_mc_luma_loop: st1 {v26.b}[0], [x2], x3 //write 8th Byte : 0 line WELS_ASM_AARCH64_FUNC_END -;void McHorVer22Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);//width+1&&height+1 +//void McHorVer22Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);//width+1&&height+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer22Width5_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -2398,8 +2398,8 @@ w5_hv_mc_luma_loop: st1 {v26.b}[4], [x2], x3 //write 5th Byte : 0 line WELS_ASM_AARCH64_FUNC_END -;void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);// height+1 +//void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// height+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02Height17_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -2491,8 +2491,8 @@ w17_v_mc_luma_loop: FILTER_6TAG_8BITS2 v2, v3, v4, v5, v6, v7, v20, v0, v1 st1 {v20.16b}, [x2], x3 //write 16Byte : last line WELS_ASM_AARCH64_FUNC_END -;void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);// height+1 +//void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// height+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02Height9_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -2550,8 +2550,8 @@ w9_v_mc_luma_loop: st1 {v20.8b}, [x2], x3 //write 8Byte : 0 line WELS_ASM_AARCH64_FUNC_END -;void McHorVer02Height5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, -; int32_t iHeight);// height+1 +//void McHorVer02Height5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, +// int32_t iHeight);// height+1 WELS_ASM_AARCH64_FUNC_BEGIN McHorVer02Height5_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 diff --git a/codec/encoder/core/arm64/intra_pred_aarch64_neon.S b/codec/encoder/core/arm64/intra_pred_aarch64_neon.S index 72d6ecae..ef50027d 100644 --- a/codec/encoder/core/arm64/intra_pred_aarch64_neon.S +++ b/codec/encoder/core/arm64/intra_pred_aarch64_neon.S @@ -440,7 +440,7 @@ WELS_ASM_AARCH64_FUNC_END .align 4 intra_1_to_8: .short 5, 10, 15, 20, 25, 30, 35, 40 intra_m7_to_p8: .short -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8 -;void WelsI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); +//void WelsI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride); WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredPlane_AArch64_neon SIGN_EXTENSION x2,w2 sub x3, x1, x2 diff --git a/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S b/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S index 9ec6d5f9..a4162137 100644 --- a/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S +++ b/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S @@ -179,7 +179,7 @@ add \arg7, \arg7, v4.4s .endm -;int32_t WelsIntra8x8Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*,uint8_t*); +//int32_t WelsIntra8x8Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*,uint8_t*); WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra8x8Combined3Sad_AArch64_neon ldr x11, [sp, #0] SIGN_EXTENSION x1,w1 @@ -282,7 +282,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra8x8Combined3Sad_AArch64_neon str w7, [x4] WELS_ASM_AARCH64_FUNC_END -;int32_t WelsIntra16x16Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +//int32_t WelsIntra16x16Combined3Sad_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra16x16Combined3Sad_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -337,7 +337,7 @@ sad_intra_16x16_x3_opt_loop0: str w7, [x4] WELS_ASM_AARCH64_FUNC_END -;int32_t WelsIntra4x4Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, int32_t,int32_t); +//int32_t WelsIntra4x4Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t*, int32_t, int32_t,int32_t); WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra4x4Combined3Satd_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -433,7 +433,7 @@ satd_intra_4x4_x3_opt_end: WELS_ASM_AARCH64_FUNC_END -;int32_t WelsIntra8x8Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*,uint8_t*); +//int32_t WelsIntra8x8Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*, uint8_t*,uint8_t*); WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra8x8Combined3Satd_AArch64_neon ldr x11, [sp, #0] @@ -527,7 +527,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra8x8Combined3Satd_AArch64_neon str w7, [x4] WELS_ASM_AARCH64_FUNC_END -;int32_t WelsIntra16x16Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); +//int32_t WelsIntra16x16Combined3Satd_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*); WELS_ASM_AARCH64_FUNC_BEGIN WelsIntra16x16Combined3Satd_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 diff --git a/codec/encoder/core/arm64/memory_aarch64_neon.S b/codec/encoder/core/arm64/memory_aarch64_neon.S index 616fef79..e20500df 100644 --- a/codec/encoder/core/arm64/memory_aarch64_neon.S +++ b/codec/encoder/core/arm64/memory_aarch64_neon.S @@ -33,7 +33,7 @@ #ifdef HAVE_NEON_AARCH64 #include "arm_arch64_common_macro.S" -;void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize); +//void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize); WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon eor v0.16b, v0.16b, v0.16b SIGN_EXTENSION x1,w1 diff --git a/codec/encoder/core/arm64/reconstruct_aarch64_neon.S b/codec/encoder/core/arm64/reconstruct_aarch64_neon.S index 5fff0478..45c089b8 100644 --- a/codec/encoder/core/arm64/reconstruct_aarch64_neon.S +++ b/codec/encoder/core/arm64/reconstruct_aarch64_neon.S @@ -490,7 +490,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsDctT4_AArch64_neon st4 {v0.d, v1.d, v2.d, v3.d}[0], [x0] WELS_ASM_AARCH64_FUNC_END -;void WelsDctFourT4_AArch64_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); +//void WelsDctFourT4_AArch64_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2); WELS_ASM_AARCH64_FUNC_BEGIN WelsDctFourT4_AArch64_neon SIGN_EXTENSION x2,w2 SIGN_EXTENSION x4,w4 @@ -520,7 +520,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsDctFourT4_AArch64_neon st1 {v6.16b, v7.16b}, [x0], #32 .endr WELS_ASM_AARCH64_FUNC_END -;void WelsIDctT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct) +//void WelsIDctT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct) WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctT4Rec_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -556,7 +556,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctT4Rec_AArch64_neon st1 {v1.s}[0],[x0],x1 st1 {v1.s}[1],[x0],x1 WELS_ASM_AARCH64_FUNC_END -;void WelsIDctFourT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); +//void WelsIDctFourT4Rec_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, int16_t* pDct); WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctFourT4Rec_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 @@ -650,8 +650,8 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsHadamardT4Dc_AArch64_neon st1 {v4.16b, v5.16b}, [x0] //store WELS_ASM_AARCH64_FUNC_END -;void WelsIDctRecI16x16Dc_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, -; int16_t* pDctDc); +//void WelsIDctRecI16x16Dc_AArch64_neon (uint8_t* pRec, int32_t iStride, uint8_t* pPrediction, int32_t iPredStride, +// int16_t* pDctDc); WELS_ASM_AARCH64_FUNC_BEGIN WelsIDctRecI16x16Dc_AArch64_neon SIGN_EXTENSION x1,w1 SIGN_EXTENSION x3,w3 diff --git a/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S b/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S index cbccb2db..422a5f84 100644 --- a/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S +++ b/codec/encoder/core/arm64/svc_motion_estimation_aarch64_neon.S @@ -32,7 +32,7 @@ #ifdef HAVE_NEON_AARCH64 #include "arm_arch64_common_macro.S" -;int32_t SumOf8x8SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); +//int32_t SumOf8x8SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); WELS_ASM_AARCH64_FUNC_BEGIN SumOf8x8SingleBlock_AArch64_neon SIGN_EXTENSION x1,w1 ld1 {v0.d}[0], [x0], x1 @@ -51,7 +51,7 @@ WELS_ASM_AARCH64_FUNC_BEGIN SumOf8x8SingleBlock_AArch64_neon mov x0, v0.d[0] WELS_ASM_AARCH64_FUNC_END -;int32_t SumOf16x16SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); +//int32_t SumOf16x16SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); WELS_ASM_AARCH64_FUNC_BEGIN SumOf16x16SingleBlock_AArch64_neon SIGN_EXTENSION x1,w1 ld1 {v0.16b}, [x0], x1 @@ -64,9 +64,9 @@ WELS_ASM_AARCH64_FUNC_BEGIN SumOf16x16SingleBlock_AArch64_neon mov x0, v0.d[0] WELS_ASM_AARCH64_FUNC_END -;void SumOf8x8BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, - ; const int32_t kiRefStride, -; uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); +//void SumOf8x8BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, +// const int32_t kiRefStride, +// uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); WELS_ASM_AARCH64_FUNC_BEGIN SumOf8x8BlockOfFrame_AArch64_neon //(uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,const int32_t kiRefStride,uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) //x5: pTimesOfFeatureValue