Change pfSampleAveraging to be a single function with internal width handling
This makes it match the behaviour of pMcLumaFunc and pMcChromaFunc.
This commit is contained in:
parent
1127aa7761
commit
9a9fc4c489
@ -81,7 +81,7 @@ typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride,
|
||||
int32_t iWidth, int32_t iHeight);
|
||||
typedef void (*PWelsLumaQuarpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight);
|
||||
typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t);
|
||||
typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t, int32_t);
|
||||
|
||||
typedef struct TagMcFunc {
|
||||
PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
|
||||
@ -90,7 +90,7 @@ typedef struct TagMcFunc {
|
||||
PWelsMcFunc pMcChromaFunc;
|
||||
|
||||
PWelsMcFunc pMcLumaFunc;
|
||||
PWelsSampleAveragingFunc pfSampleAveraging[2];
|
||||
PWelsSampleAveragingFunc pfSampleAveraging;
|
||||
} SMcFunc;
|
||||
|
||||
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
|
||||
|
@ -41,6 +41,9 @@
|
||||
#include "mc.h"
|
||||
#include "cpu_core.h"
|
||||
|
||||
typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
|
||||
int32_t, int32_t);
|
||||
|
||||
namespace WelsEnc {
|
||||
/*------------------weight for chroma fraction pixel interpolation------------------*/
|
||||
//kuiA = (8 - dx) * (8 - dy);
|
||||
@ -406,6 +409,14 @@ void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t i
|
||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
||||
pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
}
|
||||
void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
||||
PixelAvgWidthEq8_c,
|
||||
PixelAvgWidthEq16_c
|
||||
};
|
||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
||||
}
|
||||
//***************************************************************************//
|
||||
// MMXEXT and SSE2 implementation //
|
||||
//***************************************************************************//
|
||||
@ -597,6 +608,14 @@ void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_
|
||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
||||
pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
}
|
||||
void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
||||
PixelAvgWidthEq8_mmx,
|
||||
PixelAvgWidthEq16_sse2
|
||||
};
|
||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
||||
}
|
||||
#endif //X86_ASM
|
||||
|
||||
//***************************************************************************//
|
||||
@ -699,6 +718,14 @@ void EncMcLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int
|
||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
||||
pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
}
|
||||
void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
||||
PixStrideAvgWidthEq8_neon,
|
||||
PixStrideAvgWidthEq16_neon
|
||||
};
|
||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
@ -807,15 +834,21 @@ void EncMcLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* p
|
||||
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
|
||||
pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
}
|
||||
void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
|
||||
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
|
||||
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
|
||||
PixStrideAvgWidthEq8_AArch64_neon,
|
||||
PixStrideAvgWidthEq16_AArch64_neon
|
||||
};
|
||||
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
|
||||
}
|
||||
#endif
|
||||
|
||||
void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
|
||||
static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
|
||||
|
||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
|
||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
|
||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
|
||||
memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
|
||||
pMcFuncs->pfSampleAveraging = PixelAvg_c;
|
||||
pMcFuncs->pMcChromaFunc = McChroma_c;
|
||||
pMcFuncs->pMcLumaFunc = McLuma_c;
|
||||
#if defined (X86_ASM)
|
||||
@ -823,8 +856,7 @@ void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
|
||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
|
||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
|
||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
|
||||
pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
|
||||
pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
|
||||
pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
|
||||
pMcFuncs->pMcChromaFunc = McChroma_sse2;
|
||||
pMcFuncs->pMcLumaFunc = McLuma_sse2;
|
||||
}
|
||||
@ -839,8 +871,7 @@ void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
|
||||
if (uiCpuFlag & WELS_CPU_NEON) {
|
||||
pMcFuncs->pMcLumaFunc = EncMcLuma_neon;
|
||||
pMcFuncs->pMcChromaFunc = EncMcChroma_neon;
|
||||
pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
|
||||
pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
|
||||
pMcFuncs->pfSampleAveraging = PixelAvg_neon;
|
||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
|
||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
|
||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
|
||||
@ -850,8 +881,7 @@ void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
|
||||
if (uiCpuFlag & WELS_CPU_NEON) {
|
||||
pMcFuncs->pMcLumaFunc = EncMcLuma_AArch64_neon;
|
||||
pMcFuncs->pMcChromaFunc = EncMcChroma_AArch64_neon;
|
||||
pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
|
||||
pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
|
||||
pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
|
||||
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
|
||||
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
|
||||
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1
|
||||
|
@ -531,15 +531,14 @@ typedef struct TagQuarParams {
|
||||
|
||||
inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefinePointer* pMeRefine,
|
||||
const int32_t kiWidth, const int32_t kiHeight, SQuarRefineParams* pParams, int32_t iStrideEnc) {
|
||||
PWelsSampleAveragingFunc* pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging;
|
||||
const int32_t kiAvgIndex = kiWidth >> 4;
|
||||
PWelsSampleAveragingFunc pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging;
|
||||
int32_t iCurCost;
|
||||
uint8_t* pEncMb = pMe->pEncMb;
|
||||
uint8_t* pTmp = NULL;
|
||||
const uint8_t kuiPixel = pMe->uiBlockSize;
|
||||
|
||||
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
|
||||
pParams->pSrcB[0], pParams->iStrideA, kiHeight);
|
||||
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
|
||||
pParams->pSrcB[0], pParams->iStrideA, kiWidth, kiHeight);
|
||||
|
||||
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[0]);
|
||||
if (iCurCost < pParams->iBestCost) {
|
||||
@ -547,24 +546,24 @@ inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefineP
|
||||
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
|
||||
}
|
||||
//=========================(0, 1)=======================//
|
||||
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
|
||||
ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiHeight);
|
||||
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
|
||||
ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiWidth, kiHeight);
|
||||
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[1]);
|
||||
if (iCurCost < pParams->iBestCost) {
|
||||
pParams->iBestQuarPix = ME_QUAR_PIXEL_BOTTOM;
|
||||
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
|
||||
}
|
||||
//==========================(-1, 0)=========================//
|
||||
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
|
||||
ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiHeight);
|
||||
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
|
||||
ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiWidth, kiHeight);
|
||||
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[2]);
|
||||
if (iCurCost < pParams->iBestCost) {
|
||||
pParams->iBestQuarPix = ME_QUAR_PIXEL_LEFT;
|
||||
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
|
||||
}
|
||||
//==========================(1, 0)=========================//
|
||||
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
|
||||
ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiHeight);
|
||||
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
|
||||
ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiWidth, kiHeight);
|
||||
|
||||
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[3]);
|
||||
if (iCurCost < pParams->iBestCost) {
|
||||
|
@ -40,8 +40,8 @@ TEST (EncMcAvg, PixelAvg) {
|
||||
}
|
||||
PixelAvgAnchor (uDstAnchor[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], MC_BUFF_SRC_STRIDE, width,
|
||||
height);
|
||||
sMcFunc.pfSampleAveraging[w] (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
|
||||
MC_BUFF_SRC_STRIDE, height);
|
||||
sMcFunc.pfSampleAveraging (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
|
||||
MC_BUFF_SRC_STRIDE, width, height);
|
||||
for (int32_t j = 0; j < height; j++) {
|
||||
for (int32_t i = 0; i < width; i++) {
|
||||
ASSERT_EQ (uDstAnchor[j][i], uDstTest[j][i]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user