Add x86 64bit asm code for downsample
This commit is contained in:
parent
ca61e286b4
commit
ae12fbde1c
@ -57,21 +57,21 @@ void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int
|
|||||||
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsampler_c;
|
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsampler_c;
|
||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
if (iCpuFlag & WELS_CPU_SSE) {
|
if (iCpuFlag & WELS_CPU_SSE) {
|
||||||
/* sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse;
|
sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse;
|
||||||
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse;
|
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse;
|
||||||
sDownsampleFunc.pfHalfAverage[2] = DyadicBilinearDownsamplerWidthx8_sse;*/
|
sDownsampleFunc.pfHalfAverage[2] = DyadicBilinearDownsamplerWidthx8_sse;
|
||||||
}
|
}
|
||||||
if (iCpuFlag & WELS_CPU_SSE2) {
|
if (iCpuFlag & WELS_CPU_SSE2) {
|
||||||
// sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2;
|
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2;
|
||||||
// sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_sse2;
|
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_sse2;
|
||||||
}
|
}
|
||||||
if (iCpuFlag & WELS_CPU_SSSE3) {
|
if (iCpuFlag & WELS_CPU_SSSE3) {
|
||||||
// sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_ssse3;
|
sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_ssse3;
|
||||||
// sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_ssse3;
|
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_ssse3;
|
||||||
}
|
}
|
||||||
if (iCpuFlag & WELS_CPU_SSE41) {
|
if (iCpuFlag & WELS_CPU_SSE41) {
|
||||||
// sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse4;
|
sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse4;
|
||||||
// sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse4;
|
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse4;
|
||||||
}
|
}
|
||||||
#endif//X86_ASM
|
#endif//X86_ASM
|
||||||
|
|
||||||
|
@ -202,31 +202,31 @@ void GeneralBilinearAccurateDownsampler_c (uint8_t* pDst, const int32_t kiDstStr
|
|||||||
|
|
||||||
|
|
||||||
#ifdef X86_ASM
|
#ifdef X86_ASM
|
||||||
//void GeneralBilinearFastDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
void GeneralBilinearFastDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
||||||
// const int32_t kiDstHeight,
|
const int32_t kiDstHeight,
|
||||||
// uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
||||||
// const int32_t kiScaleBitWidth = 16, kiScaleBitHeight = 15;
|
const int32_t kiScaleBitWidth = 16, kiScaleBitHeight = 15;
|
||||||
// const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
|
const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
|
||||||
//
|
|
||||||
// uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
|
uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
|
||||||
// uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
|
uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
|
||||||
//
|
|
||||||
// GeneralBilinearFastDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
GeneralBilinearFastDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
||||||
// pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
||||||
//}
|
}
|
||||||
//
|
|
||||||
//void GeneralBilinearAccurateDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
void GeneralBilinearAccurateDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
||||||
// const int32_t kiDstHeight,
|
const int32_t kiDstHeight,
|
||||||
// uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
||||||
// const int32_t kiScaleBit = 15;
|
const int32_t kiScaleBit = 15;
|
||||||
// const uint32_t kuiScale = (1 << kiScaleBit);
|
const uint32_t kuiScale = (1 << kiScaleBit);
|
||||||
//
|
|
||||||
// uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
|
uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
|
||||||
// uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
|
uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
|
||||||
//
|
|
||||||
// GeneralBilinearAccurateDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
GeneralBilinearAccurateDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
||||||
// pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
||||||
//}
|
}
|
||||||
#endif //X86_ASM
|
#endif //X86_ASM
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
#ifdef HAVE_NEON
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user