Merge pull request #1203 from dongzha/AddAQArm64
add ARM64 Adaptative Quantization code and UT
This commit is contained in:
commit
6ba537bee5
1
Makefile
1
Makefile
@ -89,6 +89,7 @@ ENCODER_INCLUDES += \
|
||||
PROCESSING_INCLUDES += \
|
||||
-I$(SRC_PATH)codec/processing/interface \
|
||||
-I$(SRC_PATH)codec/processing/src/common \
|
||||
-I$(SRC_PATH)codec/processing/src/adaptivequantization \
|
||||
-I$(SRC_PATH)codec/processing/src/scrolldetection
|
||||
|
||||
GTEST_INCLUDES += \
|
||||
|
@ -31,6 +31,7 @@
|
||||
549947F2196A3FB400BA3D87 /* ScrollDetectionFuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D5196A3FB400BA3D87 /* ScrollDetectionFuncs.cpp */; };
|
||||
549947F3196A3FB400BA3D87 /* vaacalcfuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D8196A3FB400BA3D87 /* vaacalcfuncs.cpp */; };
|
||||
549947F4196A3FB400BA3D87 /* vaacalculation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D9196A3FB400BA3D87 /* vaacalculation.cpp */; };
|
||||
6C749B78197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
@ -91,6 +92,7 @@
|
||||
549947D8196A3FB400BA3D87 /* vaacalcfuncs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vaacalcfuncs.cpp; sourceTree = "<group>"; };
|
||||
549947D9196A3FB400BA3D87 /* vaacalculation.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vaacalculation.cpp; sourceTree = "<group>"; };
|
||||
549947DA196A3FB400BA3D87 /* vaacalculation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vaacalculation.h; sourceTree = "<group>"; };
|
||||
6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = adaptive_quantization_aarch64_neon.S; path = arm64/adaptive_quantization_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
@ -108,6 +110,7 @@
|
||||
4CC6094D197E008B00BE8B8B /* arm64 */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */,
|
||||
4CC6094E197E009D00BE8B8B /* down_sample_aarch64_neon.S */,
|
||||
);
|
||||
name = arm64;
|
||||
@ -337,6 +340,7 @@
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
6C749B78197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S in Sources */,
|
||||
549947F4196A3FB400BA3D87 /* vaacalculation.cpp in Sources */,
|
||||
549947E9196A3FB400BA3D87 /* ComplexityAnalysis.cpp in Sources */,
|
||||
549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */,
|
||||
|
@ -235,6 +235,11 @@ void CAdaptiveQuantization::WelsInitVarFunc (PVarFunc& pfVar, int32_t iCpuFlag)
|
||||
pfVar = SampleVariance16x16_neon;
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_NEON_AARCH64
|
||||
if (iCpuFlag & WELS_CPU_NEON) {
|
||||
pfVar = SampleVariance16x16_AArch64_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void SampleVariance16x16_c (uint8_t* pRefY, int32_t iRefStride, uint8_t* pSrcY, int32_t iSrcStride,
|
||||
|
@ -68,6 +68,12 @@ VarFunc SampleVariance16x16_neon;
|
||||
WELSVP_EXTERN_C_END
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_NEON_AARCH64
|
||||
WELSVP_EXTERN_C_BEGIN
|
||||
VarFunc SampleVariance16x16_AArch64_neon;
|
||||
WELSVP_EXTERN_C_END
|
||||
#endif
|
||||
|
||||
class CAdaptiveQuantization : public IStrategy {
|
||||
public:
|
||||
CAdaptiveQuantization (int32_t iCpuFlag);
|
||||
|
@ -0,0 +1,88 @@
|
||||
/*!
|
||||
* \copy
|
||||
* Copyright (c) 2013, Cisco Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef HAVE_NEON_AARCH64
|
||||
.text
|
||||
#include "arm_arch64_common_macro.S"
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN SampleVariance16x16_AArch64_neon
|
||||
ld1 {v1.16b}, [x0], x1 //save the ref data (16bytes)
|
||||
ld1 {v0.16b}, [x2], x3 //save the src data (16bytes)
|
||||
uabd v2.16b, v0.16b, v1.16b
|
||||
umull v3.8h, v2.8b, v2.8b
|
||||
umull2 v4.8h, v2.16b, v2.16b
|
||||
uaddlp v4.4s, v4.8h
|
||||
uadalp v4.4s, v3.8h //sqr
|
||||
uaddlp v2.8h, v2.16b //sum
|
||||
|
||||
uaddlp v1.8h, v0.16b //sum_cur
|
||||
|
||||
umull v3.8h, v0.8b, v0.8b
|
||||
umull2 v5.8h, v0.16b, v0.16b
|
||||
uaddlp v3.4s, v3.8h
|
||||
uadalp v3.4s, v5.8h //sqr_cur
|
||||
.rept 15
|
||||
ld1 {v5.16b}, [x0], x1 //save the ref data (16bytes)
|
||||
ld1 {v0.16b}, [x2], x3 //save the src data (16bytes)
|
||||
|
||||
uabd v6.16b, v0.16b, v5.16b
|
||||
|
||||
//v1 save sum_cur
|
||||
uadalp v1.8h, v0.16b
|
||||
|
||||
//v4 save sqr
|
||||
umull v5.8h, v6.8b, v6.8b
|
||||
umull2 v7.8h, v6.16b, v6.16b
|
||||
uadalp v4.4s, v5.8h //sqr
|
||||
uadalp v4.4s, v7.8h //sqr
|
||||
|
||||
//v2 save sum
|
||||
uadalp v2.8h, v6.16b
|
||||
|
||||
//v3 save sqr_cur
|
||||
umull v5.8h, v0.8b, v0.8b
|
||||
umull2 v7.8h, v0.16b, v0.16b
|
||||
uadalp v3.4s, v5.8h //sqr_cur
|
||||
uadalp v3.4s, v7.8h //sqr_cur
|
||||
.endr
|
||||
uaddlv s2, v2.8h //sum
|
||||
uaddlv s1, v1.8h //sum_cur
|
||||
ins v2.s[1], v1.s[0] // sum, sum_cur
|
||||
shrn v2.4h, v2.4s, #8 // sum, sum_cur>>8
|
||||
mul v2.4h, v2.4h, v2.4h//// sum*sum, sum_cur*sum_cur
|
||||
uaddlv d4, v4.4s //sqr
|
||||
uaddlv d3, v3.4s //sqr_cur
|
||||
ins v4.s[1], v3.s[0] // sqr, sqr_cur
|
||||
shrn v4.4h, v4.4s, #8 // sqr, sqr_cur>>8
|
||||
sub v4.4h, v4.4h, v2.4h
|
||||
st1 {v4.s}[0], [x4]
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
#endif
|
@ -42,6 +42,7 @@ endif
|
||||
|
||||
ifeq ($(ASM_ARCH), arm64)
|
||||
PROCESSING_ASM_ARM64_SRCS=\
|
||||
$(PROCESSING_SRCDIR)/src/arm64/adaptive_quantization_aarch64_neon.S\
|
||||
$(PROCESSING_SRCDIR)/src/arm64/down_sample_aarch64_neon.S\
|
||||
|
||||
PROCESSING_OBJS += $(PROCESSING_ASM_ARM64_SRCS:.S=.$(OBJ))
|
||||
|
76
test/processing/ProcessUT_AdaptiveQuantization.cpp
Normal file
76
test/processing/ProcessUT_AdaptiveQuantization.cpp
Normal file
@ -0,0 +1,76 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "cpu.h"
|
||||
#include "cpu_core.h"
|
||||
#include "util.h"
|
||||
#include "macros.h"
|
||||
#include "IWelsVP.h"
|
||||
#include "AdaptiveQuantization.h"
|
||||
|
||||
|
||||
using namespace nsWelsVP;
|
||||
|
||||
static void FillWithRandomData (uint8_t* p, int32_t Len) {
|
||||
for (int32_t i = 0; i < Len; i++) {
|
||||
p[i] = rand() % 256;
|
||||
}
|
||||
}
|
||||
|
||||
void SampleVariance16x16_ref (uint8_t* pRefY, int32_t iRefStride, uint8_t* pSrcY, int32_t iSrcStride,
|
||||
SMotionTextureUnit* pMotionTexture) {
|
||||
uint32_t uiCurSquare = 0, uiSquare = 0;
|
||||
uint16_t uiCurSum = 0, uiSum = 0;
|
||||
|
||||
for (int32_t y = 0; y < MB_WIDTH_LUMA; y++) {
|
||||
for (int32_t x = 0; x < MB_WIDTH_LUMA; x++) {
|
||||
uint32_t uiDiff = WELS_ABS (pRefY[x] - pSrcY[x]);
|
||||
uiSum += uiDiff;
|
||||
uiSquare += uiDiff * uiDiff;
|
||||
|
||||
uiCurSum += pSrcY[x];
|
||||
uiCurSquare += pSrcY[x] * pSrcY[x];
|
||||
}
|
||||
pRefY += iRefStride;
|
||||
pSrcY += iSrcStride;
|
||||
}
|
||||
|
||||
uiSum = uiSum >> 8;
|
||||
pMotionTexture->uiMotionIndex = (uiSquare >> 8) - (uiSum * uiSum);
|
||||
|
||||
uiCurSum = uiCurSum >> 8;
|
||||
pMotionTexture->uiTextureIndex = (uiCurSquare >> 8) - (uiCurSum * uiCurSum);
|
||||
}
|
||||
|
||||
#define GENERATE_AQTEST(method) \
|
||||
TEST (AdaptiveQuantization, method) {\
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pRefY,32*16,16)\
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pSrcY,48*16,16)\
|
||||
SMotionTextureUnit pMotionTexture[2];\
|
||||
FillWithRandomData (pRefY,32*16);\
|
||||
FillWithRandomData (pSrcY,48*16);\
|
||||
SampleVariance16x16_ref (pRefY,32,pSrcY,48,&pMotionTexture[0]);\
|
||||
method(pRefY,32,pSrcY,48,&pMotionTexture[1]);\
|
||||
ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
|
||||
ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
|
||||
memset (pRefY,0,32*16);\
|
||||
memset (pSrcY,255,48*16);\
|
||||
SampleVariance16x16_ref (pRefY,32,pSrcY,48,&pMotionTexture[0]);\
|
||||
method(pRefY,32,pSrcY,48,&pMotionTexture[1]);\
|
||||
ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
|
||||
ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
|
||||
}
|
||||
|
||||
GENERATE_AQTEST (SampleVariance16x16_c)
|
||||
#if defined(X86_ASM)
|
||||
GENERATE_AQTEST (SampleVariance16x16_sse2)
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
GENERATE_AQTEST (SampleVariance16x16_neon)
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
GENERATE_AQTEST (SampleVariance16x16_AArch64_neon)
|
||||
#endif
|
||||
|
@ -1,5 +1,6 @@
|
||||
PROCESSING_UNITTEST_SRCDIR=test/processing
|
||||
PROCESSING_UNITTEST_CPP_SRCS=\
|
||||
$(PROCESSING_UNITTEST_SRCDIR)/ProcessUT_AdaptiveQuantization.cpp\
|
||||
$(PROCESSING_UNITTEST_SRCDIR)/ProcessUT_ScrollDetection.cpp\
|
||||
|
||||
PROCESSING_UNITTEST_OBJS += $(PROCESSING_UNITTEST_CPP_SRCS:.cpp=.$(OBJ))
|
||||
|
Loading…
x
Reference in New Issue
Block a user