Add x86 64bit asm code for downsample
This commit is contained in:
parent
ca61e286b4
commit
ae12fbde1c
@ -57,21 +57,21 @@ void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int
|
||||
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsampler_c;
|
||||
#if defined(X86_ASM)
|
||||
if (iCpuFlag & WELS_CPU_SSE) {
|
||||
/* sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse;
|
||||
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse;
|
||||
sDownsampleFunc.pfHalfAverage[2] = DyadicBilinearDownsamplerWidthx8_sse;*/
|
||||
sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse;
|
||||
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse;
|
||||
sDownsampleFunc.pfHalfAverage[2] = DyadicBilinearDownsamplerWidthx8_sse;
|
||||
}
|
||||
if (iCpuFlag & WELS_CPU_SSE2) {
|
||||
// sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2;
|
||||
// sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_sse2;
|
||||
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse2;
|
||||
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_sse2;
|
||||
}
|
||||
if (iCpuFlag & WELS_CPU_SSSE3) {
|
||||
// sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_ssse3;
|
||||
// sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_ssse3;
|
||||
sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_ssse3;
|
||||
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_ssse3;
|
||||
}
|
||||
if (iCpuFlag & WELS_CPU_SSE41) {
|
||||
// sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse4;
|
||||
// sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse4;
|
||||
sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsamplerWidthx32_sse4;
|
||||
sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsamplerWidthx16_sse4;
|
||||
}
|
||||
#endif//X86_ASM
|
||||
|
||||
|
@ -202,31 +202,31 @@ void GeneralBilinearAccurateDownsampler_c (uint8_t* pDst, const int32_t kiDstStr
|
||||
|
||||
|
||||
#ifdef X86_ASM
|
||||
//void GeneralBilinearFastDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
||||
// const int32_t kiDstHeight,
|
||||
// uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
||||
// const int32_t kiScaleBitWidth = 16, kiScaleBitHeight = 15;
|
||||
// const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
|
||||
//
|
||||
// uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
|
||||
// uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
|
||||
//
|
||||
// GeneralBilinearFastDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
||||
// pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
||||
//}
|
||||
//
|
||||
//void GeneralBilinearAccurateDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
||||
// const int32_t kiDstHeight,
|
||||
// uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
||||
// const int32_t kiScaleBit = 15;
|
||||
// const uint32_t kuiScale = (1 << kiScaleBit);
|
||||
//
|
||||
// uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
|
||||
// uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
|
||||
//
|
||||
// GeneralBilinearAccurateDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
||||
// pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
||||
//}
|
||||
void GeneralBilinearFastDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
||||
const int32_t kiDstHeight,
|
||||
uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
||||
const int32_t kiScaleBitWidth = 16, kiScaleBitHeight = 15;
|
||||
const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
|
||||
|
||||
uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
|
||||
uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
|
||||
|
||||
GeneralBilinearFastDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
||||
pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
||||
}
|
||||
|
||||
void GeneralBilinearAccurateDownsamplerWrap_sse2 (uint8_t* pDst, const int32_t kiDstStride, const int32_t kiDstWidth,
|
||||
const int32_t kiDstHeight,
|
||||
uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
|
||||
const int32_t kiScaleBit = 15;
|
||||
const uint32_t kuiScale = (1 << kiScaleBit);
|
||||
|
||||
uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
|
||||
uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
|
||||
|
||||
GeneralBilinearAccurateDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
|
||||
pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
|
||||
}
|
||||
#endif //X86_ASM
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user