Change pfSampleAveraging to be a single function with internal width handling

This makes it match the behaviour of pMcLumaFunc and pMcChromaFunc.
This commit is contained in:
Martin Storsjö 2015-01-28 12:26:17 +02:00
parent 1127aa7761
commit 9a9fc4c489
4 changed files with 52 additions and 23 deletions

View File

@ -81,7 +81,7 @@ typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride,
int32_t iWidth, int32_t iHeight);
typedef void (*PWelsLumaQuarpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t);
typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t, int32_t);
typedef struct TagMcFunc {
PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
@ -90,7 +90,7 @@ typedef struct TagMcFunc {
PWelsMcFunc pMcChromaFunc;
PWelsMcFunc pMcLumaFunc;
PWelsSampleAveragingFunc pfSampleAveraging[2];
PWelsSampleAveragingFunc pfSampleAveraging;
} SMcFunc;
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);

View File

@ -41,6 +41,9 @@
#include "mc.h"
#include "cpu_core.h"
typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
int32_t, int32_t);
namespace WelsEnc {
/*------------------weight for chroma fraction pixel interpolation------------------*/
//kuiA = (8 - dx) * (8 - dy);
@ -406,6 +409,14 @@ void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t i
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
PixelAvgWidthEq8_c,
PixelAvgWidthEq16_c
};
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
}
//***************************************************************************//
// MMXEXT and SSE2 implementation //
//***************************************************************************//
@ -597,6 +608,14 @@ void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
PixelAvgWidthEq8_mmx,
PixelAvgWidthEq16_sse2
};
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
}
#endif //X86_ASM
//***************************************************************************//
@ -699,6 +718,14 @@ void EncMcLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
PixStrideAvgWidthEq8_neon,
PixStrideAvgWidthEq16_neon
};
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
}
#endif
#if defined(HAVE_NEON_AARCH64)
@ -807,15 +834,21 @@ void EncMcLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* p
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
PixStrideAvgWidthEq8_AArch64_neon,
PixStrideAvgWidthEq16_AArch64_neon
};
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
}
#endif
void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
pMcFuncs->pfSampleAveraging = PixelAvg_c;
pMcFuncs->pMcChromaFunc = McChroma_c;
pMcFuncs->pMcLumaFunc = McLuma_c;
#if defined (X86_ASM)
@ -823,8 +856,7 @@ void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
pMcFuncs->pMcChromaFunc = McChroma_sse2;
pMcFuncs->pMcLumaFunc = McLuma_sse2;
}
@ -839,8 +871,7 @@ void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
if (uiCpuFlag & WELS_CPU_NEON) {
pMcFuncs->pMcLumaFunc = EncMcLuma_neon;
pMcFuncs->pMcChromaFunc = EncMcChroma_neon;
pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
pMcFuncs->pfSampleAveraging = PixelAvg_neon;
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
@ -850,8 +881,7 @@ void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
if (uiCpuFlag & WELS_CPU_NEON) {
pMcFuncs->pMcLumaFunc = EncMcLuma_AArch64_neon;
pMcFuncs->pMcChromaFunc = EncMcChroma_AArch64_neon;
pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1

View File

@ -531,15 +531,14 @@ typedef struct TagQuarParams {
inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefinePointer* pMeRefine,
const int32_t kiWidth, const int32_t kiHeight, SQuarRefineParams* pParams, int32_t iStrideEnc) {
PWelsSampleAveragingFunc* pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging;
const int32_t kiAvgIndex = kiWidth >> 4;
PWelsSampleAveragingFunc pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging;
int32_t iCurCost;
uint8_t* pEncMb = pMe->pEncMb;
uint8_t* pTmp = NULL;
const uint8_t kuiPixel = pMe->uiBlockSize;
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
pParams->pSrcB[0], pParams->iStrideA, kiHeight);
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
pParams->pSrcB[0], pParams->iStrideA, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[0]);
if (iCurCost < pParams->iBestCost) {
@ -547,24 +546,24 @@ inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefineP
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
}
//=========================(0, 1)=======================//
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiHeight);
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[1]);
if (iCurCost < pParams->iBestCost) {
pParams->iBestQuarPix = ME_QUAR_PIXEL_BOTTOM;
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
}
//==========================(-1, 0)=========================//
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiHeight);
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[2]);
if (iCurCost < pParams->iBestCost) {
pParams->iBestQuarPix = ME_QUAR_PIXEL_LEFT;
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
}
//==========================(1, 0)=========================//
pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiHeight);
pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[3]);
if (iCurCost < pParams->iBestCost) {

View File

@ -40,8 +40,8 @@ TEST (EncMcAvg, PixelAvg) {
}
PixelAvgAnchor (uDstAnchor[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], MC_BUFF_SRC_STRIDE, width,
height);
sMcFunc.pfSampleAveraging[w] (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
MC_BUFF_SRC_STRIDE, height);
sMcFunc.pfSampleAveraging (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
MC_BUFF_SRC_STRIDE, width, height);
for (int32_t j = 0; j < height; j++) {
for (int32_t i = 0; i < width; i++) {
ASSERT_EQ (uDstAnchor[j][i], uDstTest[j][i]);