Merge pull request #2531 from GuangweiWang/enable-disable-AVX2
add option for enable/disable AVX2
This commit is contained in:
commit
1f770c488c
@ -1,6 +1,18 @@
|
|||||||
|
#for x86
|
||||||
|
HAVE_AVX2 := true
|
||||||
|
|
||||||
ifneq ($(filter %86 x86_64, $(ARCH)),)
|
ifneq ($(filter %86 x86_64, $(ARCH)),)
|
||||||
include $(SRC_PATH)build/x86-common.mk
|
include $(SRC_PATH)build/x86-common.mk
|
||||||
|
ifeq ($(USE_ASM), Yes)
|
||||||
|
ifeq ($(HAVE_AVX2), true)
|
||||||
|
CFLAGS += -DHAVE_AVX2
|
||||||
|
CXXFLAGS += -DHAVE_AVX2
|
||||||
|
ASMFLAGS += -DHAVE_AVX2
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
#for arm
|
||||||
ifneq ($(filter-out arm64, $(filter arm%, $(ARCH))),)
|
ifneq ($(filter-out arm64, $(filter arm%, $(ARCH))),)
|
||||||
ifeq ($(USE_ASM), Yes)
|
ifeq ($(USE_ASM), Yes)
|
||||||
ASM_ARCH = arm
|
ASM_ARCH = arm
|
||||||
@ -8,6 +20,8 @@ ASMFLAGS += -I$(SRC_PATH)codec/common/arm/
|
|||||||
CFLAGS += -DHAVE_NEON
|
CFLAGS += -DHAVE_NEON
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
#for arm64
|
||||||
ifneq ($(filter arm64 aarch64, $(ARCH)),)
|
ifneq ($(filter arm64 aarch64, $(ARCH)),)
|
||||||
ifeq ($(USE_ASM), Yes)
|
ifeq ($(USE_ASM), Yes)
|
||||||
ASM_ARCH = arm64
|
ASM_ARCH = arm64
|
||||||
|
@ -56,7 +56,6 @@
|
|||||||
#define WELS_CPU_SSE42 0x00000400 /* sse 4.2 */
|
#define WELS_CPU_SSE42 0x00000400 /* sse 4.2 */
|
||||||
|
|
||||||
/* CPU features application extensive */
|
/* CPU features application extensive */
|
||||||
#define WELS_CPU_AVX 0x00000800 /* Advanced Vector eXtentions */
|
|
||||||
#define WELS_CPU_FPU 0x00001000 /* x87-FPU on chip */
|
#define WELS_CPU_FPU 0x00001000 /* x87-FPU on chip */
|
||||||
#define WELS_CPU_HTT 0x00002000 /* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
|
#define WELS_CPU_HTT 0x00002000 /* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
|
||||||
physical processor package is capable of supporting more than one logic processor
|
physical processor package is capable of supporting more than one logic processor
|
||||||
@ -67,7 +66,13 @@
|
|||||||
#define WELS_CPU_MOVBE 0x00008000 /* MOVBE instruction */
|
#define WELS_CPU_MOVBE 0x00008000 /* MOVBE instruction */
|
||||||
#define WELS_CPU_AES 0x00010000 /* AES instruction extensions */
|
#define WELS_CPU_AES 0x00010000 /* AES instruction extensions */
|
||||||
#define WELS_CPU_FMA 0x00020000 /* AVX VEX FMA instruction sets */
|
#define WELS_CPU_FMA 0x00020000 /* AVX VEX FMA instruction sets */
|
||||||
|
#define WELS_CPU_AVX 0x00000800 /* Advanced Vector eXtentions */
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX2
|
||||||
#define WELS_CPU_AVX2 0x00040000 /* AVX2 */
|
#define WELS_CPU_AVX2 0x00040000 /* AVX2 */
|
||||||
|
#else
|
||||||
|
#define WELS_CPU_AVX2 0x00000000 /* !AVX2 */
|
||||||
|
#endif
|
||||||
|
|
||||||
#define WELS_CPU_CACHELINE_16 0x10000000 /* CacheLine Size 16 */
|
#define WELS_CPU_CACHELINE_16 0x10000000 /* CacheLine Size 16 */
|
||||||
#define WELS_CPU_CACHELINE_32 0x20000000 /* CacheLine Size 32 */
|
#define WELS_CPU_CACHELINE_32 0x20000000 /* CacheLine Size 32 */
|
||||||
|
@ -678,6 +678,7 @@ WELS_EXTERN WelsIDctRecI16x16Dc_sse2
|
|||||||
; AVX2 functions
|
; AVX2 functions
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
|
|
||||||
|
%ifdef HAVE_AVX2
|
||||||
; out=%1 pPixel1=%2 iStride1=%3 pPixel2=%4 iStride2=%5 wels_shufb0312_movzxw=%6 clobber=%7,%8
|
; out=%1 pPixel1=%2 iStride1=%3 pPixel2=%4 iStride2=%5 wels_shufb0312_movzxw=%6 clobber=%7,%8
|
||||||
%macro AVX2_LoadDiff16P 8
|
%macro AVX2_LoadDiff16P 8
|
||||||
vmovq x%1, [%2 ]
|
vmovq x%1, [%2 ]
|
||||||
@ -1011,3 +1012,5 @@ WELS_EXTERN WelsIDctT4Rec_avx2
|
|||||||
POP_XMM
|
POP_XMM
|
||||||
LOAD_5_PARA_POP
|
LOAD_5_PARA_POP
|
||||||
ret
|
ret
|
||||||
|
%endif
|
||||||
|
|
||||||
|
@ -1504,6 +1504,7 @@ loop_get_satd_16x16_right:
|
|||||||
;
|
;
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
|
|
||||||
|
%ifdef HAVE_AVX2
|
||||||
; out=%1 pSrcA=%2 pSrcB=%3 HSumSubDB1_256=%4 ymm_clobber=%5
|
; out=%1 pSrcA=%2 pSrcB=%3 HSumSubDB1_256=%4 ymm_clobber=%5
|
||||||
%macro AVX2_LoadDiffSatd16x1 5
|
%macro AVX2_LoadDiffSatd16x1 5
|
||||||
vbroadcasti128 %1, [%2]
|
vbroadcasti128 %1, [%2]
|
||||||
@ -1723,6 +1724,8 @@ WelsSampleSatd16x4N_avx2:
|
|||||||
%endif
|
%endif
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
%endif
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
;
|
;
|
||||||
;Pixel_satd_wxh_avx2 END
|
;Pixel_satd_wxh_avx2 END
|
||||||
|
@ -48,8 +48,10 @@ extern "C" {
|
|||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
void IdctResAddPred_mmx (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
void IdctResAddPred_mmx (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
||||||
void IdctResAddPred_sse2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
void IdctResAddPred_sse2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
||||||
|
#if defined(HAVE_AVX2)
|
||||||
void IdctResAddPred_avx2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
void IdctResAddPred_avx2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
||||||
void IdctFourResAddPred_avx2 (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc);
|
void IdctFourResAddPred_avx2 (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc);
|
||||||
|
#endif
|
||||||
#endif//X86_ASM
|
#endif//X86_ASM
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
|
@ -1005,11 +1005,14 @@ void InitPredFunc (PWelsDecoderContext pCtx, uint32_t uiCpuFlag) {
|
|||||||
pCtx->pGetIChromaPredFunc[C_PRED_DC_T] = WelsDecoderIChromaPredDcTop_sse2;
|
pCtx->pGetIChromaPredFunc[C_PRED_DC_T] = WelsDecoderIChromaPredDcTop_sse2;
|
||||||
pCtx->pGetI4x4LumaPredFunc[I4_PRED_H] = WelsDecoderI4x4LumaPredH_sse2;
|
pCtx->pGetI4x4LumaPredFunc[I4_PRED_H] = WelsDecoderI4x4LumaPredH_sse2;
|
||||||
}
|
}
|
||||||
|
#if defined(HAVE_AVX2)
|
||||||
if (uiCpuFlag & WELS_CPU_AVX2) {
|
if (uiCpuFlag & WELS_CPU_AVX2) {
|
||||||
pCtx->pIdctResAddPredFunc = IdctResAddPred_avx2;
|
pCtx->pIdctResAddPredFunc = IdctResAddPred_avx2;
|
||||||
pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_avx2;
|
pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_avx2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
//reset decoder number related statistics info
|
//reset decoder number related statistics info
|
||||||
|
@ -370,6 +370,7 @@ WELS_EXTERN WelsDequantIHadamard4x4_sse2
|
|||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
%ifdef HAVE_AVX2
|
||||||
; data=%1 abs_out=%2 ff=%3 mf=%4 7FFFh=%5
|
; data=%1 abs_out=%2 ff=%3 mf=%4 7FFFh=%5
|
||||||
%macro AVX2_Quant 5
|
%macro AVX2_Quant 5
|
||||||
vpabsw %2, %1
|
vpabsw %2, %1
|
||||||
@ -502,3 +503,5 @@ WELS_EXTERN WelsQuantFour4x4Max_avx2
|
|||||||
POP_XMM
|
POP_XMM
|
||||||
LOAD_4_PARA_POP
|
LOAD_4_PARA_POP
|
||||||
ret
|
ret
|
||||||
|
%endif
|
||||||
|
|
||||||
|
@ -107,10 +107,12 @@ void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int
|
|||||||
sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse4;
|
sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse4;
|
||||||
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse41;
|
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse41;
|
||||||
}
|
}
|
||||||
|
#ifdef HAVE_AVX2
|
||||||
if (iCpuFlag & WELS_CPU_AVX2) {
|
if (iCpuFlag & WELS_CPU_AVX2) {
|
||||||
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2;
|
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2;
|
||||||
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_avx2;
|
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_avx2;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
#endif//X86_ASM
|
#endif//X86_ASM
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
|
@ -99,8 +99,10 @@ GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_sse2;
|
|||||||
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse2;
|
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse2;
|
||||||
GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_ssse3;
|
GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_ssse3;
|
||||||
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse41;
|
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse41;
|
||||||
|
#ifdef HAVE_AVX2
|
||||||
GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_avx2;
|
GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_avx2;
|
||||||
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_avx2;
|
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_avx2;
|
||||||
|
#endif
|
||||||
|
|
||||||
SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_ssse3;
|
SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_ssse3;
|
||||||
SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_sse4;
|
SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_sse4;
|
||||||
@ -120,12 +122,14 @@ void GeneralBilinearFastDownsampler_ssse3 (uint8_t* pDst, int32_t iDstStride, in
|
|||||||
void GeneralBilinearAccurateDownsampler_sse41 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
void GeneralBilinearAccurateDownsampler_sse41 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
||||||
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
||||||
uint32_t uiScaleY);
|
uint32_t uiScaleY);
|
||||||
|
#ifdef HAVE_AVX2
|
||||||
void GeneralBilinearFastDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
void GeneralBilinearFastDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
||||||
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
||||||
uint32_t uiScaleY);
|
uint32_t uiScaleY);
|
||||||
void GeneralBilinearAccurateDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
void GeneralBilinearAccurateDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
||||||
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
||||||
uint32_t uiScaleY);
|
uint32_t uiScaleY);
|
||||||
|
#endif
|
||||||
|
|
||||||
WELSVP_EXTERN_C_END
|
WELSVP_EXTERN_C_END
|
||||||
#endif
|
#endif
|
||||||
|
@ -284,8 +284,10 @@ DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (sse2)
|
|||||||
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse2)
|
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse2)
|
||||||
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (ssse3)
|
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (ssse3)
|
||||||
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse41)
|
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse41)
|
||||||
|
#ifdef HAVE_AVX2
|
||||||
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (avx2)
|
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (avx2)
|
||||||
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (avx2)
|
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (avx2)
|
||||||
|
#endif
|
||||||
#endif //X86_ASM
|
#endif //X86_ASM
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
#ifdef HAVE_NEON
|
||||||
|
@ -3254,6 +3254,7 @@ WELS_EXTERN GeneralBilinearAccurateDownsampler_sse41
|
|||||||
%undef xmm_xfrac1_begin
|
%undef xmm_xfrac1_begin
|
||||||
%undef xmm_xfrac_inc
|
%undef xmm_xfrac_inc
|
||||||
|
|
||||||
|
%ifdef HAVE_AVX2
|
||||||
; xpos_int=%1 xpos_frac=%2 inc_int+1=%3 inc_frac=%4 tmp=%5
|
; xpos_int=%1 xpos_frac=%2 inc_int+1=%3 inc_frac=%4 tmp=%5
|
||||||
%macro AVX2_BilinearIncXposuw 5
|
%macro AVX2_BilinearIncXposuw 5
|
||||||
vpaddusw %5, %2, %4
|
vpaddusw %5, %2, %4
|
||||||
@ -4552,3 +4553,5 @@ WELS_EXTERN GeneralBilinearAccurateDownsampler_avx2
|
|||||||
%undef ymm_xfrac0_begin
|
%undef ymm_xfrac0_begin
|
||||||
%undef ymm_xfrac1_begin
|
%undef ymm_xfrac1_begin
|
||||||
%undef ymm_xfrac_inc
|
%undef ymm_xfrac_inc
|
||||||
|
%endif
|
||||||
|
|
||||||
|
@ -2088,6 +2088,7 @@ sqdiff_bgd_width_loop:
|
|||||||
%assign push_num push_num - stack_alloc_num
|
%assign push_num push_num - stack_alloc_num
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%ifdef HAVE_AVX2
|
||||||
; Max unsigned byte per quadword
|
; Max unsigned byte per quadword
|
||||||
; out=%1 in=%2 tmp=%3
|
; out=%1 in=%2 tmp=%3
|
||||||
%macro AVX2_Maxubq 3
|
%macro AVX2_Maxubq 3
|
||||||
@ -3557,3 +3558,6 @@ WELS_EXTERN VAACalcSadSsdBgd_avx2
|
|||||||
%undef p_sd8x8
|
%undef p_sd8x8
|
||||||
%undef p_mad8x8
|
%undef p_mad8x8
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
%endif
|
||||||
|
|
||||||
|
@ -53,6 +53,7 @@ void SetNonZeroCount_ref (int8_t* pNonZeroCount) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
|
#if defined(HAVE_AVX2)
|
||||||
void IdctFourResAddPred_ref (uint8_t* pPred, int32_t iStride, int16_t* pRs) {
|
void IdctFourResAddPred_ref (uint8_t* pPred, int32_t iStride, int16_t* pRs) {
|
||||||
IdctResAddPred_ref (pPred + 0 * iStride + 0, iStride, pRs + 0 * 16);
|
IdctResAddPred_ref (pPred + 0 * iStride + 0, iStride, pRs + 0 * 16);
|
||||||
IdctResAddPred_ref (pPred + 0 * iStride + 4, iStride, pRs + 1 * 16);
|
IdctResAddPred_ref (pPred + 0 * iStride + 4, iStride, pRs + 1 * 16);
|
||||||
@ -60,6 +61,7 @@ void IdctFourResAddPred_ref (uint8_t* pPred, int32_t iStride, int16_t* pRs) {
|
|||||||
IdctResAddPred_ref (pPred + 4 * iStride + 4, iStride, pRs + 3 * 16);
|
IdctResAddPred_ref (pPred + 4 * iStride + 4, iStride, pRs + 3 * 16);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
} // anon ns
|
} // anon ns
|
||||||
|
|
||||||
@ -138,9 +140,11 @@ GENERATE_IDCTRESADDPRED (IdctResAddPred_c, 0)
|
|||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_mmx, WELS_CPU_MMXEXT)
|
GENERATE_IDCTRESADDPRED (IdctResAddPred_mmx, WELS_CPU_MMXEXT)
|
||||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_sse2, WELS_CPU_SSE2)
|
GENERATE_IDCTRESADDPRED (IdctResAddPred_sse2, WELS_CPU_SSE2)
|
||||||
|
#if defined(HAVE_AVX2)
|
||||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_avx2, WELS_CPU_AVX2)
|
GENERATE_IDCTRESADDPRED (IdctResAddPred_avx2, WELS_CPU_AVX2)
|
||||||
GENERATE_IDCTFOURRESADDPRED (IdctFourResAddPred_avx2, WELS_CPU_AVX2)
|
GENERATE_IDCTFOURRESADDPRED (IdctFourResAddPred_avx2, WELS_CPU_AVX2)
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_neon, WELS_CPU_NEON)
|
GENERATE_IDCTRESADDPRED (IdctResAddPred_neon, WELS_CPU_NEON)
|
||||||
|
@ -372,12 +372,15 @@ GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_ssse3
|
|||||||
WELS_CPU_SSSE3)
|
WELS_CPU_SSSE3)
|
||||||
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_sse41,
|
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_sse41,
|
||||||
GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_SSE41)
|
GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_SSE41)
|
||||||
|
#ifdef HAVE_AVX2
|
||||||
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_avx2, GeneralBilinearFastDownsampler_ref, 1,
|
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_avx2, GeneralBilinearFastDownsampler_ref, 1,
|
||||||
WELS_CPU_AVX2)
|
WELS_CPU_AVX2)
|
||||||
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_avx2,
|
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_avx2,
|
||||||
GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_AVX2)
|
GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_AVX2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx32_neon, 1, WELS_CPU_NEON)
|
GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx32_neon, 1, WELS_CPU_NEON)
|
||||||
GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsampler_neon, 1, WELS_CPU_NEON)
|
GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsampler_neon, 1, WELS_CPU_NEON)
|
||||||
|
Loading…
Reference in New Issue
Block a user