add MemoryZero Arm64 code and UT
This commit is contained in:
parent
95ac333f3b
commit
eace9b7b00
@ -48,6 +48,7 @@
|
||||
9AED665019469FC1009A3567 /* welsCodecTrace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED664C19469FC1009A3567 /* welsCodecTrace.cpp */; };
|
||||
9AED66661946A2B3009A3567 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED66651946A2B3009A3567 /* utils.cpp */; };
|
||||
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */; };
|
||||
F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
@ -157,6 +158,7 @@
|
||||
9AED66651946A2B3009A3567 /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = utils.cpp; path = ../../../common/src/utils.cpp; sourceTree = "<group>"; };
|
||||
9AED66671946A2C4009A3567 /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = utils.h; path = ../../../common/inc/utils.h; sourceTree = "<group>"; };
|
||||
F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = reconstruct_aarch64_neon.S; path = arm64/reconstruct_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = memory_aarch64_neon.S; path = arm64/memory_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
@ -186,6 +188,7 @@
|
||||
4CB8F2B219235FAC005D6386 /* arm64 */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */,
|
||||
F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */,
|
||||
4C23BC5F195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S */,
|
||||
4CBC1B82194ACBB400214D9E /* intra_pred_aarch64_neon.S */,
|
||||
@ -431,6 +434,7 @@
|
||||
4C23BC60195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S in Sources */,
|
||||
4CE4472B18BC605C0017DF25 /* wels_preprocess.cpp in Sources */,
|
||||
4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */,
|
||||
F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */,
|
||||
4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */,
|
||||
4CE4471718BC605C0017DF25 /* mc.cpp in Sources */,
|
||||
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */,
|
||||
|
63
codec/encoder/core/arm64/memory_aarch64_neon.S
Normal file
63
codec/encoder/core/arm64/memory_aarch64_neon.S
Normal file
@ -0,0 +1,63 @@
|
||||
/*!
|
||||
* \copy
|
||||
* Copyright (c) 2013, Cisco Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef HAVE_NEON_AARCH64
|
||||
.text
|
||||
#include "arm_arch64_common_macro.S"
|
||||
|
||||
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon
|
||||
eor v0.16b, v0.16b, v0.16b
|
||||
cmp x1, #32
|
||||
b.eq mem_zero_32_neon_start
|
||||
b.lt mem_zero_24_neon_start
|
||||
mem_zero_loop:
|
||||
subs x1, x1, #64
|
||||
st1 {v0.16b}, [x0], #16
|
||||
st1 {v0.16b}, [x0], #16
|
||||
st1 {v0.16b}, [x0], #16
|
||||
st1 {v0.16b}, [x0], #16
|
||||
b.ne mem_zero_loop
|
||||
b mem_zero_end
|
||||
|
||||
mem_zero_32_neon_start:
|
||||
st1 {v0.16b}, [x0], #16
|
||||
st1 {v0.16b}, [x0], #16
|
||||
b mem_zero_end
|
||||
mem_zero_24_neon_start:
|
||||
st1 {v0.16b}, [x0], #16
|
||||
st1 {v0.8b}, [x0], #8
|
||||
mem_zero_end:
|
||||
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
#endif
|
@ -129,6 +129,8 @@ void WelsSetMemZeroSize8_mmx (void* pDst, int32_t iSize);
|
||||
void WelsPrefetchZero_mmx (int8_t const* kpDst);
|
||||
#elif defined(HAVE_NEON)
|
||||
void WelsSetMemZero_neon (void* pDst, int32_t iSize);
|
||||
#elif defined(HAVE_NEON_AARCH64)
|
||||
void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize);
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
@ -179,6 +179,14 @@ int32_t InitFunctionPointers (SWelsFuncPtrList* pFuncList, SWelsSvcCodingParam*
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
if (uiCpuFlag & WELS_CPU_NEON) {
|
||||
pFuncList->pfSetMemZeroSize8 = WelsSetMemZero_AArch64_neon;
|
||||
pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZero_AArch64_neon;
|
||||
pFuncList->pfSetMemZeroSize64 = WelsSetMemZero_AArch64_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
InitExpandPictureFunc (& (pFuncList->sExpandPicFunc), uiCpuFlag);
|
||||
|
||||
/* Intra_Prediction_fn*/
|
||||
|
@ -61,6 +61,7 @@ ifeq ($(ASM_ARCH), arm64)
|
||||
ENCODER_ASM_ARM64_SRCS=\
|
||||
$(ENCODER_SRCDIR)/core/arm64/intra_pred_aarch64_neon.S\
|
||||
$(ENCODER_SRCDIR)/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S\
|
||||
$(ENCODER_SRCDIR)/core/arm64/memory_aarch64_neon.S\
|
||||
$(ENCODER_SRCDIR)/core/arm64/pixel_aarch64_neon.S\
|
||||
$(ENCODER_SRCDIR)/core/arm64/reconstruct_aarch64_neon.S\
|
||||
|
||||
|
94
test/encoder/EncUT_MemoryZero.cpp
Normal file
94
test/encoder/EncUT_MemoryZero.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
#include<gtest/gtest.h>
|
||||
#include<math.h>
|
||||
#include<stdlib.h>
|
||||
#include<time.h>
|
||||
|
||||
#include "cpu_core.h"
|
||||
#include "cpu.h"
|
||||
#include "macros.h"
|
||||
#include "wels_func_ptr_def.h"
|
||||
#include "../../codec/encoder/core/src/encoder.cpp"
|
||||
|
||||
using namespace WelsSVCEnc;
|
||||
#define MEMORYZEROTEST_NUM 1000
|
||||
|
||||
TEST (SetMemZeroFunTest, WelsSetMemZero) {
|
||||
int32_t iLen =64;
|
||||
int32_t iCpuCores = 0;
|
||||
SWelsFuncPtrList sFuncPtrList;
|
||||
uint32_t uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores);
|
||||
/* Functionality utilization of CPU instructions dependency */
|
||||
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
|
||||
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
|
||||
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
|
||||
#if defined(X86_ASM)
|
||||
if (uiCpuFlag & WELS_CPU_MMXEXT) {
|
||||
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZeroSize8_mmx; // confirmed_safe_unsafe_usage
|
||||
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage
|
||||
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage
|
||||
}
|
||||
if (uiCpuFlag & WELS_CPU_SSE2) {
|
||||
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZeroAligned64_sse2; // confirmed_safe_unsafe_usage
|
||||
}
|
||||
#endif//X86_ASM
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
if (uiCpuFlag & WELS_CPU_NEON) {
|
||||
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_neon;
|
||||
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_neon;
|
||||
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
if (uiCpuFlag & WELS_CPU_NEON) {
|
||||
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_AArch64_neon;
|
||||
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_AArch64_neon;
|
||||
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_AArch64_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
ENFORCE_STACK_ALIGN_2D (uint8_t, pInputAlign, 2, 64*101, 16)
|
||||
|
||||
for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {
|
||||
memset(pInputAlign[0], 255, 64*101);
|
||||
memset(pInputAlign[1], 255, 64*101);
|
||||
iLen = 64*(1+(rand()%100));
|
||||
WelsSetMemZero_c(pInputAlign[0],iLen);
|
||||
sFuncPtrList.pfSetMemZeroSize64Aligned16(pInputAlign[1],iLen);
|
||||
for (int32_t i = 0 ; i < 64*101; i++) {
|
||||
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {
|
||||
memset(pInputAlign[0], 255, 64*101);
|
||||
memset(pInputAlign[1], 255, 64*101);
|
||||
iLen = 64*(1+(rand()%100));
|
||||
WelsSetMemZero_c(pInputAlign[0]+1,iLen);
|
||||
sFuncPtrList.pfSetMemZeroSize64(pInputAlign[1]+1,iLen);
|
||||
for (int32_t i = 0 ; i < 64*101; i++) {
|
||||
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
|
||||
}
|
||||
}
|
||||
|
||||
memset(pInputAlign[0], 255, 64*101);
|
||||
memset(pInputAlign[1], 255, 64*101);
|
||||
iLen = 32;
|
||||
WelsSetMemZero_c(pInputAlign[0]+1,iLen);
|
||||
sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
|
||||
for (int32_t i = 0 ; i < 64*101; i++) {
|
||||
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
|
||||
}
|
||||
|
||||
memset(pInputAlign[0], 255, 64*101);
|
||||
memset(pInputAlign[1], 255, 64*101);
|
||||
iLen = 24;
|
||||
WelsSetMemZero_c(pInputAlign[0]+1,iLen);
|
||||
sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
|
||||
for (int32_t i = 0 ; i < 64*101; i++) {
|
||||
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@ ENCODER_UNITTEST_CPP_SRCS=\
|
||||
$(ENCODER_UNITTEST_SRCDIR)/EncUT_ExpGolomb.cpp\
|
||||
$(ENCODER_UNITTEST_SRCDIR)/EncUT_GetIntraPredictor.cpp\
|
||||
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\
|
||||
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryZero.cpp\
|
||||
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\
|
||||
$(ENCODER_UNITTEST_SRCDIR)/EncUT_Reconstruct.cpp\
|
||||
$(ENCODER_UNITTEST_SRCDIR)/EncUT_Sample.cpp\
|
||||
|
Loading…
x
Reference in New Issue
Block a user