add MemoryZero Arm64 code and UT

This commit is contained in:
dongzhang 2014-07-08 11:18:45 +08:00
parent 95ac333f3b
commit eace9b7b00
7 changed files with 173 additions and 0 deletions

View File

@ -48,6 +48,7 @@
9AED665019469FC1009A3567 /* welsCodecTrace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED664C19469FC1009A3567 /* welsCodecTrace.cpp */; };
9AED66661946A2B3009A3567 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED66651946A2B3009A3567 /* utils.cpp */; };
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */; };
F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@ -157,6 +158,7 @@
9AED66651946A2B3009A3567 /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = utils.cpp; path = ../../../common/src/utils.cpp; sourceTree = "<group>"; };
9AED66671946A2C4009A3567 /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = utils.h; path = ../../../common/inc/utils.h; sourceTree = "<group>"; };
F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = reconstruct_aarch64_neon.S; path = arm64/reconstruct_aarch64_neon.S; sourceTree = "<group>"; };
F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = memory_aarch64_neon.S; path = arm64/memory_aarch64_neon.S; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -186,6 +188,7 @@
4CB8F2B219235FAC005D6386 /* arm64 */ = {
isa = PBXGroup;
children = (
F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */,
F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */,
4C23BC5F195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S */,
4CBC1B82194ACBB400214D9E /* intra_pred_aarch64_neon.S */,
@ -431,6 +434,7 @@
4C23BC60195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S in Sources */,
4CE4472B18BC605C0017DF25 /* wels_preprocess.cpp in Sources */,
4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */,
F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */,
4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */,
4CE4471718BC605C0017DF25 /* mc.cpp in Sources */,
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */,

View File

@ -0,0 +1,63 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
.text
#include "arm_arch64_common_macro.S"
WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon
eor v0.16b, v0.16b, v0.16b
cmp x1, #32
b.eq mem_zero_32_neon_start
b.lt mem_zero_24_neon_start
mem_zero_loop:
subs x1, x1, #64
st1 {v0.16b}, [x0], #16
st1 {v0.16b}, [x0], #16
st1 {v0.16b}, [x0], #16
st1 {v0.16b}, [x0], #16
b.ne mem_zero_loop
b mem_zero_end
mem_zero_32_neon_start:
st1 {v0.16b}, [x0], #16
st1 {v0.16b}, [x0], #16
b mem_zero_end
mem_zero_24_neon_start:
st1 {v0.16b}, [x0], #16
st1 {v0.8b}, [x0], #8
mem_zero_end:
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -129,6 +129,8 @@ void WelsSetMemZeroSize8_mmx (void* pDst, int32_t iSize);
void WelsPrefetchZero_mmx (int8_t const* kpDst);
#elif defined(HAVE_NEON)
void WelsSetMemZero_neon (void* pDst, int32_t iSize);
#elif defined(HAVE_NEON_AARCH64)
void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize);
#endif
#if defined(__cplusplus)

View File

@ -179,6 +179,14 @@ int32_t InitFunctionPointers (SWelsFuncPtrList* pFuncList, SWelsSvcCodingParam*
}
#endif
#if defined(HAVE_NEON_AARCH64)
if (uiCpuFlag & WELS_CPU_NEON) {
pFuncList->pfSetMemZeroSize8 = WelsSetMemZero_AArch64_neon;
pFuncList->pfSetMemZeroSize64Aligned16 = WelsSetMemZero_AArch64_neon;
pFuncList->pfSetMemZeroSize64 = WelsSetMemZero_AArch64_neon;
}
#endif
InitExpandPictureFunc (& (pFuncList->sExpandPicFunc), uiCpuFlag);
/* Intra_Prediction_fn*/

View File

@ -61,6 +61,7 @@ ifeq ($(ASM_ARCH), arm64)
ENCODER_ASM_ARM64_SRCS=\
$(ENCODER_SRCDIR)/core/arm64/intra_pred_aarch64_neon.S\
$(ENCODER_SRCDIR)/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S\
$(ENCODER_SRCDIR)/core/arm64/memory_aarch64_neon.S\
$(ENCODER_SRCDIR)/core/arm64/pixel_aarch64_neon.S\
$(ENCODER_SRCDIR)/core/arm64/reconstruct_aarch64_neon.S\

View File

@ -0,0 +1,94 @@
#include<gtest/gtest.h>
#include<math.h>
#include<stdlib.h>
#include<time.h>
#include "cpu_core.h"
#include "cpu.h"
#include "macros.h"
#include "wels_func_ptr_def.h"
#include "../../codec/encoder/core/src/encoder.cpp"
using namespace WelsSVCEnc;
#define MEMORYZEROTEST_NUM 1000
TEST (SetMemZeroFunTest, WelsSetMemZero) {
int32_t iLen =64;
int32_t iCpuCores = 0;
SWelsFuncPtrList sFuncPtrList;
uint32_t uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores);
/* Functionality utilization of CPU instructions dependency */
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_c; // confirmed_safe_unsafe_usage
#if defined(X86_ASM)
if (uiCpuFlag & WELS_CPU_MMXEXT) {
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZeroSize8_mmx; // confirmed_safe_unsafe_usage
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZeroSize64_mmx; // confirmed_safe_unsafe_usage
}
if (uiCpuFlag & WELS_CPU_SSE2) {
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZeroAligned64_sse2; // confirmed_safe_unsafe_usage
}
#endif//X86_ASM
#if defined(HAVE_NEON)
if (uiCpuFlag & WELS_CPU_NEON) {
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_neon;
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_neon;
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_neon;
}
#endif
#if defined(HAVE_NEON_AARCH64)
if (uiCpuFlag & WELS_CPU_NEON) {
sFuncPtrList.pfSetMemZeroSize8 = WelsSetMemZero_AArch64_neon;
sFuncPtrList.pfSetMemZeroSize64Aligned16 = WelsSetMemZero_AArch64_neon;
sFuncPtrList.pfSetMemZeroSize64 = WelsSetMemZero_AArch64_neon;
}
#endif
ENFORCE_STACK_ALIGN_2D (uint8_t, pInputAlign, 2, 64*101, 16)
for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {
memset(pInputAlign[0], 255, 64*101);
memset(pInputAlign[1], 255, 64*101);
iLen = 64*(1+(rand()%100));
WelsSetMemZero_c(pInputAlign[0],iLen);
sFuncPtrList.pfSetMemZeroSize64Aligned16(pInputAlign[1],iLen);
for (int32_t i = 0 ; i < 64*101; i++) {
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
}
}
for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {
memset(pInputAlign[0], 255, 64*101);
memset(pInputAlign[1], 255, 64*101);
iLen = 64*(1+(rand()%100));
WelsSetMemZero_c(pInputAlign[0]+1,iLen);
sFuncPtrList.pfSetMemZeroSize64(pInputAlign[1]+1,iLen);
for (int32_t i = 0 ; i < 64*101; i++) {
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
}
}
memset(pInputAlign[0], 255, 64*101);
memset(pInputAlign[1], 255, 64*101);
iLen = 32;
WelsSetMemZero_c(pInputAlign[0]+1,iLen);
sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
for (int32_t i = 0 ; i < 64*101; i++) {
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
}
memset(pInputAlign[0], 255, 64*101);
memset(pInputAlign[1], 255, 64*101);
iLen = 24;
WelsSetMemZero_c(pInputAlign[0]+1,iLen);
sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
for (int32_t i = 0 ; i < 64*101; i++) {
ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
}
}

View File

@ -7,6 +7,7 @@ ENCODER_UNITTEST_CPP_SRCS=\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_ExpGolomb.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_GetIntraPredictor.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryZero.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_Reconstruct.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_Sample.cpp\