diff --git a/codec/decoder/core/src/mc.cpp b/codec/decoder/core/src/mc.cpp index bcc166be..326002f4 100644 --- a/codec/decoder/core/src/mc.cpp +++ b/codec/decoder/core/src/mc.cpp @@ -133,10 +133,10 @@ static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, u //--------------------Luma sample MC------------------// -static inline int32_t HorFilterInput16bit_c (int16_t* pSrc) { - int32_t iPix05 = pSrc[-2] + pSrc[3]; - int32_t iPix14 = pSrc[-1] + pSrc[2]; - int32_t iPix23 = pSrc[ 0] + pSrc[1]; +static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) { + int32_t iPix05 = pSrc[0] + pSrc[5]; + int32_t iPix14 = pSrc[1] + pSrc[4]; + int32_t iPix23 = pSrc[2] + pSrc[3]; return (iPix05 - (iPix14 * 5) + (iPix23 * 20)); } @@ -213,7 +213,7 @@ static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_ iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride); } for (k = 0; k < iWidth; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[2 + k]) + 512) >> 10); + pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10); } pSrc += iSrcStride; pDst += iDstStride; diff --git a/codec/encoder/core/inc/mc.h b/codec/encoder/core/inc/mc.h index fccff2ce..c8aa68ab 100644 --- a/codec/encoder/core/inc/mc.h +++ b/codec/encoder/core/inc/mc.h @@ -45,7 +45,7 @@ //x y means dx(mv[0] & 3) and dy(mv[1] & 3) namespace WelsEnc { -void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag); +void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag); } #endif//WELS_MC_H__ diff --git a/codec/encoder/core/inc/wels_func_ptr_def.h b/codec/encoder/core/inc/wels_func_ptr_def.h index c7cef638..fb5394b0 100644 --- a/codec/encoder/core/inc/wels_func_ptr_def.h +++ b/codec/encoder/core/inc/wels_func_ptr_def.h @@ -75,7 +75,7 @@ typedef int32_t (*PQuantizationHadamardFunc) (int16_t* pRes, const int16_t kiFF, int16_t* pBlock); typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY mv, int32_t iWidth, int32_t iHeight); + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight); typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); diff --git a/codec/encoder/core/src/encoder.cpp b/codec/encoder/core/src/encoder.cpp index 5d7982bb..db71bb82 100644 --- a/codec/encoder/core/src/encoder.cpp +++ b/codec/encoder/core/src/encoder.cpp @@ -209,7 +209,7 @@ int32_t InitFunctionPointers (sWelsEncCtx* pEncCtx, SWelsSvcCodingParam* pParam, /* Motion compensation */ /*init pixel average function*/ /*get one column or row pixel when refinement*/ - WelsInitMcFuncs (pFuncList, uiCpuFlag); + WelsInitMcFuncs (&pFuncList->sMcFuncs, uiCpuFlag); InitCoeffFunc (pFuncList,uiCpuFlag,pParam->iEntropyCodingModeFlag); WelsInitEncodingFuncs (pFuncList, uiCpuFlag); diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp index 36cfd23e..181a30ab 100644 --- a/codec/encoder/core/src/mc.cpp +++ b/codec/encoder/core/src/mc.cpp @@ -125,10 +125,10 @@ static inline int32_t HorFilter_c (const uint8_t* pSrc) { return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2)); } -static inline int32_t HorFilterInput16bit1_c (int16_t* pSrc) { - int32_t iPix05 = pSrc[-2] + pSrc[3]; - int32_t iPix14 = pSrc[-1] + pSrc[2]; - int32_t iPix23 = pSrc[ 0] + pSrc[1]; +static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) { + int32_t iPix05 = pSrc[0] + pSrc[5]; + int32_t iPix14 = pSrc[1] + pSrc[4]; + int32_t iPix23 = pSrc[2] + pSrc[3]; return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2)); } @@ -203,7 +203,7 @@ static inline void McHorVer22WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStrid pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride); } for (k = 0; k < 16; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10); + pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10); } pSrc += iSrcStride; pDst += iDstStride; @@ -342,7 +342,7 @@ static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_ pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride); } for (k = 0; k < iWidth; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10); + pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10); } pSrc += iSrcStride; pDst += iDstStride; @@ -367,11 +367,11 @@ static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* p } void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY mv, int32_t iWidth, int32_t iHeight) + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) //pSrc has been added the offset of mv { - const int32_t kiDx = mv.iMvX & 0x07; - const int32_t kiDy = mv.iMvY & 0x07; + const int32_t kiDx = iMvX & 0x07; + const int32_t kiDy = iMvY & 0x07; if (0 == kiDx && 0 == kiDy) { McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); @@ -543,9 +543,9 @@ static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, const uint8_t* pABCD, int32_t iHeigh); void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; static const McChromaWidthEqx kpfFuncs[2] = { McChromaWidthEq4_mmx, McChromaWidthEq8_sse2 @@ -559,9 +559,9 @@ void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int3 } void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; static const McChromaWidthEqx kpfFuncs[2] = { McChromaWidthEq4_mmx, @@ -651,9 +651,9 @@ void EncMcHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); } void EncMcChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; if (0 == kiD8x && 0 == kiD8y) { if (8 == iWidth) McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); @@ -748,9 +748,9 @@ void EncMcHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); } void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; if (0 == kiD8x && 0 == kiD8y) { if (8 == iWidth) McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); @@ -765,9 +765,8 @@ void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* } #endif -typedef void (*PixelAvgFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t); -void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { - static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c}; +void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) { + static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c}; static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x] McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c, @@ -799,50 +798,50 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { McHorVer03WidthEq16_AArch64_neon, EncMcHorVer13_AArch64_neon, EncMcHorVer23_AArch64_neon, EncMcHorVer33_AArch64_neon }; #endif - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20_c; - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02_c; - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_c; - memcpy (pFuncList->sMcFuncs.pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc)); - pFuncList->sMcFuncs.pfChromaMc = McChroma_c; - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16)); + pMcFuncs->pfLumaHalfpelHor = McHorVer20_c; + pMcFuncs->pfLumaHalfpelVer = McHorVer02_c; + pMcFuncs->pfLumaHalfpelCen = McHorVer22_c; + memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc)); + pMcFuncs->pfChromaMc = McChroma_c; + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16)); #if defined (X86_ASM) if (uiCpuFlag & WELS_CPU_SSE2) { - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_sse2; - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_sse2; - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2; - pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx; - pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2; - pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2; - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2)); + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2; + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2; + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2; + pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx; + pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2; + pMcFuncs->pfChromaMc = McChroma_sse2; + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2)); } if (uiCpuFlag & WELS_CPU_SSSE3) { - pFuncList->sMcFuncs.pfChromaMc = McChroma_ssse3; + pMcFuncs->pfChromaMc = McChroma_ssse3; } #endif //(X86_ASM) #if defined(HAVE_NEON) if (uiCpuFlag & WELS_CPU_NEON) { - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon)); - pFuncList->sMcFuncs.pfChromaMc = EncMcChroma_neon; - pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon; - pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon; - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1 + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon)); + pMcFuncs->pfChromaMc = EncMcChroma_neon; + pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon; + pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1 } #endif #if defined(HAVE_NEON_AARCH64) if (uiCpuFlag & WELS_CPU_NEON) { - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon, + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon, sizeof (pWelsMcFuncWidthEq16_AArch64_neon)); - pFuncList->sMcFuncs.pfChromaMc = EncMcChroma_AArch64_neon; - pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon; - pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon; - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1 + pMcFuncs->pfChromaMc = EncMcChroma_AArch64_neon; + pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon; + pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1 } #endif } diff --git a/codec/encoder/core/src/svc_base_layer_md.cpp b/codec/encoder/core/src/svc_base_layer_md.cpp index f5f0908b..3e587fef 100644 --- a/codec/encoder/core/src/svc_base_layer_md.cpp +++ b/codec/encoder/core/src/svc_base_layer_md.cpp @@ -1248,8 +1248,8 @@ void WelsMdBackgroundMbEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, } //MC pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma, iLineSizeY, pDstLuma, 16, 16); - pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp, 8, 8); //Cb - pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp, 8, 8); //Cr + pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb + pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr pCurMb->uiCbp = 0; pMbCache->bCollocatedPredFlag = true; @@ -1350,12 +1350,12 @@ bool WelsMdPSkipEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCac const int32_t iStrideUV = (sQpelMvp.iMvY >> 1) * iLineSizeUV + (sQpelMvp.iMvX >> 1); pRefCb += iStrideUV; - pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp, 8, 8); //Cb + pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb iSadCostChroma = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[1], pCurLayer->iEncStride[1], pDstCb, 8); pRefCr += iStrideUV; - pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp, 8, 8); //Cr + pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr iSadCostChroma += pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[2], pCurLayer->iEncStride[2], pDstCr, 8); @@ -1463,8 +1463,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); pTmpRefCb = pRefCb + iMvStride; pTmpRefCr = pRefCr + iMvStride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pDstCb, 8, *pMv, 8, 8); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pDstCr, 8, *pMv, 8, 8); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pDstCb, 8, pMv->iMvX, pMv->iMvY, 8, 8); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pDstCr, 8, pMv->iMvX, pMv->iMvY, 8, 8); //Cr pWelsMd->iCostSkipMb = pEncCtx->pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], pCurDqLayer->iEncStride[0], pDstLuma, 16); @@ -1498,8 +1498,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM pTmpRefCr = pRefCr + iRefBlk4Stride + iMvStride; pTmpDstCb = pDstCb + iDstBlk4Stride; pTmpDstCr = pDstCr + iDstBlk4Stride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, *pMv, 8, 4); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, *pMv, 8, 4); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 8, 4); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 8, 4); //Cr } break; @@ -1526,8 +1526,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM pTmpRefCr = pRefCr + iRefBlk4Stride + iMvStride; pTmpDstCb = pDstCb + iRefBlk4Stride; pTmpDstCr = pDstCr + iRefBlk4Stride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, *pMv, 4, 8); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, *pMv, 4, 8); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 4, 8); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 4, 8); //Cr } break; @@ -1560,8 +1560,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM pTmpDstCb = pDstCb + iDstBlk4Stride; pTmpRefCr = pRefCr + iRefBlk4Stride; pTmpDstCr = pDstCr + iDstBlk4Stride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, *pMv, 4, 4); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, *pMv, 4, 4); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 4, 4); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 4, 4); //Cr } break; diff --git a/codec/encoder/core/src/svc_mode_decision.cpp b/codec/encoder/core/src/svc_mode_decision.cpp index a0615396..e6358474 100644 --- a/codec/encoder/core/src/svc_mode_decision.cpp +++ b/codec/encoder/core/src/svc_mode_decision.cpp @@ -415,8 +415,8 @@ void SvcMdSCDMbEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCach } //MC pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma + iOffsetY, iLineSizeY, pDstLuma, 16, 16); - pFunc->sMcFuncs.pfChromaMc (pRefCb + iOffsetUV, iLineSizeUV, pDstCb, 8, sMvp, 8, 8); - pFunc->sMcFuncs.pfChromaMc (pRefCr + iOffsetUV, iLineSizeUV, pDstCr, 8, sMvp, 8, 8); + pFunc->sMcFuncs.pfChromaMc (pRefCb + iOffsetUV, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); + pFunc->sMcFuncs.pfChromaMc (pRefCr + iOffsetUV, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); pCurMb->uiCbp = 0; pWelsMd->iCostLuma = 0; diff --git a/test/build/win32/codec_ut/codec_unittest.vcproj b/test/build/win32/codec_ut/codec_unittest.vcproj index 37c8f29b..16199ffa 100644 --- a/test/build/win32/codec_ut/codec_unittest.vcproj +++ b/test/build/win32/codec_ut/codec_unittest.vcproj @@ -414,6 +414,10 @@ RelativePath="..\..\..\encoder\EncUT_MemoryZero.cpp" > + + diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index 46039e3d..b517c77c 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -1,365 +1,39 @@ #include #include "codec_def.h" #include "mc.h" -#include "mem_align.h" -#include "cpu_core.h" #include "cpu.h" using namespace WelsDec; -#define MC_BUFF_SRC_STRIDE 32 -#define MC_BUFF_DST_STRIDE 32 -#define MC_BUFF_HEIGHT 30 +#define LUMA_FUNC(funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) \ + sMcFunc.pMcLumaFunc (src, srcstride, dst, dststride, mvx, mvy, width, height) -/**********************MC Unit Test Anchor Code Begin******************************/ -bool bQpelNeeded[4][4] = { - { false, true, false, true }, - { true, true, true, true }, - { false, true, false, true }, - { true, true, true, true } -}; -int32_t iHpelRef0Array[4][4] = { - { 0, 1, 1, 1 }, - { 0, 1, 1, 1 }, - { 2, 3, 3, 3 }, - { 0, 1, 1, 1 } -}; -int32_t iHpelRef1Array[4][4] = { - { 0, 0, 0, 0 }, - { 2, 2, 3, 2 }, - { 2, 2, 3, 2 }, - { 2, 2, 3, 2 } -}; -#define FILTER6TAP(pPixBuff, x, iStride) ((pPixBuff)[x-2*iStride] + (pPixBuff)[x+3*iStride] - 5*((pPixBuff)[x-iStride] + (pPixBuff)[x+2*iStride]) + 20*((pPixBuff)[x] + (pPixBuff)[x+iStride])) -static inline uint8_t Clip255 (int32_t x) { - return ((x & ~255) ? (-x) >> 31 & 255 : x); -} +#define CHROMA_FUNC sMcFunc.pMcChromaFunc -void MCCopyAnchor (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, - int32_t iHeight) { - for (int32_t y = 0; y < iHeight; y++) { - memcpy (pDst, pSrc, iWidth * sizeof (uint8_t)); - pSrc += iSrcStride; - pDst += iDstStride; - } -} +#include "mc_test_common.h" -void MCHalfPelFilterAnchor (uint8_t* pDstH, uint8_t* pDstV, uint8_t* pDstHV, uint8_t* pSrc, - int32_t iStride, int32_t iWidth, int32_t iHeight, int16_t* pBuf) { - for (int32_t y = 0; y < iHeight; y++) { - for (int32_t x = 0; x < iWidth; x++) - pDstH[x] = Clip255 ((FILTER6TAP (pSrc, x, 1) + 16) >> 5); - for (int32_t x = -2; x < iWidth + 3; x++) { - int32_t v = FILTER6TAP (pSrc, x, iStride); - pDstV[x] = Clip255 ((v + 16) >> 5); - pBuf[x + 2] = v; - } - for (int32_t x = 0; x < iWidth; x++) - pDstHV[x] = Clip255 ((FILTER6TAP (pBuf + 2, x, 1) + 512) >> 10); - pDstH += iStride; - pDstV += iStride; - pDstHV += iStride; - pSrc += iStride; - } -} +DEF_MCCOPYTEST (, 2, 2, 1) +DEF_MCCOPYTEST (, 2, 4, 1) +DEF_MCCOPYTEST (, 4, 2, 0) +DEF_MCCOPYTEST (, 4, 4, 0) +DEF_MCCOPYTEST (, 4, 8, 0) +DEF_MCCOPYTEST (, 8, 4, 0) +DEF_MCCOPYTEST (, 8, 8, 0) +DEF_MCCOPYTEST (, 16, 8, 0) +DEF_MCCOPYTEST (, 8, 16, 0) +DEF_MCCOPYTEST (, 16, 16, 0) -void PixelAvgAnchor (uint8_t* pDst, int32_t iDstStride, - uint8_t* pSrc1, int32_t iSrc1Stride, - uint8_t* pSrc2, int32_t iSrc2Stride, int32_t iWidth, int32_t iHeight) { - for (int32_t y = 0; y < iHeight; y++) { - for (int32_t x = 0; x < iWidth; x++) - pDst[x] = (pSrc1[x] + pSrc2[x] + 1) >> 1; - pDst += iDstStride; - pSrc1 += iSrc1Stride; - pSrc2 += iSrc2Stride; - } -} +DEF_LUMA_MCTEST (, 4, 4) +DEF_LUMA_MCTEST (, 4, 8) +DEF_LUMA_MCTEST (, 8, 4) +DEF_LUMA_MCTEST (, 8, 8) +DEF_LUMA_MCTEST (, 16, 8) +DEF_LUMA_MCTEST (, 8, 16) +DEF_LUMA_MCTEST (, 16, 16) -void MCLumaAnchor (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrc[4], int32_t iSrcStride, - int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { - int32_t iMvXIdx = iMvX & 3; - int32_t iMvYIdx = iMvY & 3; - int32_t iOffset = (iMvY >> 2) * iSrcStride + (iMvX >> 2); - uint8_t* pSrc1 = pSrc[iHpelRef0Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvYIdx) == 3) * iSrcStride; - - if (bQpelNeeded[iMvYIdx][iMvXIdx]) { - uint8_t* pSrc2 = pSrc[iHpelRef1Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvXIdx) == 3); - PixelAvgAnchor (pDst, iDstStride, pSrc1, iSrcStride, pSrc2, iSrcStride, iWidth, iHeight); - } else { - MCCopyAnchor (pSrc1, iSrcStride, pDst, iDstStride, iWidth, iHeight); - } -} - -void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, uint8_t* pSrc, int32_t iSrcStride, - int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { - uint8_t* pSrcTmp; - pSrc += (iMvY >> 3) * iSrcStride + (iMvX >> 3) * 2; - pSrcTmp = &pSrc[iSrcStride]; - - int32_t iMvXIdx = iMvX & 0x07; - int32_t iMvYIdx = iMvY & 0x07; - int32_t iBiPara0 = (8 - iMvXIdx) * (8 - iMvYIdx); - int32_t iBiPara1 = iMvXIdx * (8 - iMvYIdx); - int32_t iBiPara2 = (8 - iMvXIdx) * iMvYIdx; - int32_t iBiPara3 = iMvXIdx * iMvYIdx; - for (int32_t y = 0; y < iHeight; y++) { - for (int32_t x = 0; x < iWidth; x++) { - pDstU[x] = (iBiPara0 * pSrc[2 * x] + iBiPara1 * pSrc[2 * x + 2] + - iBiPara2 * pSrcTmp[2 * x] + iBiPara3 * pSrcTmp[2 * x + 2] + 32) >> 6; - pDstV[x] = (iBiPara0 * pSrc[2 * x + 1] + iBiPara1 * pSrc[2 * x + 3] + - iBiPara2 * pSrcTmp[2 * x + 1] + iBiPara3 * pSrcTmp[2 * x + 3] + 32) >> 6; - } - pSrc = pSrcTmp; - pSrcTmp += iSrcStride; - pDstU += iDstStride; - pDstV += iDstStride; - } -} - -/**********************MC Unit Test OPENH264 Code Begin******************************/ -#define DEF_MCCOPYTEST(iH,iW, forceC) \ -TEST(McCopy_c,iW##x##iH) \ -{ \ - SMcFunc sMcFunc; \ - int32_t iCpuCores = 1; \ - uint32_t uiCpuFlag;\ - for(int32_t k =0; k<2; k++)\ - {\ - if(k==0||forceC!=0)\ - {\ - uiCpuFlag = 0;\ - }else \ - {\ - uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores); \ - }\ - InitMcFunc(&sMcFunc, uiCpuFlag); \ - uint8_t uSrcAnchor[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ - uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ - ENFORCE_STACK_ALIGN_2D(uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ - ENFORCE_STACK_ALIGN_2D(uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ - for(int32_t j=0;j +#include "codec_def.h" +#include "mc.h" +#include "cpu.h" +using namespace WelsEnc; + +static void McLumaFunc (SMcFunc* pFuncs, const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03); + ASSERT_EQ (iWidth, 16); + pFuncs->pfLumaQuarpelMc[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +#define InitMcFunc WelsInitMcFuncs + +#define LUMA_FUNC(funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) \ + McLumaFunc (funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) + +#define CHROMA_FUNC sMcFunc.pfChromaMc + +#include "mc_test_common.h" + +DEF_MCCOPYTEST (Enc, 16, 8, 0) +DEF_MCCOPYTEST (Enc, 16, 16, 0) + +DEF_LUMA_MCTEST (Enc, 16, 8) +DEF_LUMA_MCTEST (Enc, 16, 16) + +DEF_CHROMA_MCTEST (Enc, 4, 2) +DEF_CHROMA_MCTEST (Enc, 4, 4) +DEF_CHROMA_MCTEST (Enc, 4, 8) +DEF_CHROMA_MCTEST (Enc, 8, 4) +DEF_CHROMA_MCTEST (Enc, 8, 8) + +TEST (EncMcAvg, PixelAvg) { + SMcFunc sMcFunc; + for (int32_t k = 0; k < 2; k++) { + for (int32_t w = 0; w < 2; w++) { + int32_t width = 8 << w; + int32_t height = 16; + uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL); + WelsInitMcFuncs (&sMcFunc, uiCpuFlag); + uint8_t uSrc1[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + uint8_t uSrc2[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + ENFORCE_STACK_ALIGN_2D (uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); + for (int32_t j = 0; j < MC_BUFF_HEIGHT; j++) { + for (int32_t i = 0; i < MC_BUFF_SRC_STRIDE; i++) { + uSrc1[j][i] = rand() % 256; + uSrc2[j][i] = rand() % 256; + } + } + PixelAvgAnchor (uDstAnchor[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], MC_BUFF_SRC_STRIDE, width, + height); + sMcFunc.pfSampleAveraging[w] (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], + MC_BUFF_SRC_STRIDE, height); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uDstAnchor[j][i], uDstTest[j][i]); + } + } + } + } +} + +TEST (EncMcHalfpel, LumaHalfpel) { + SMcFunc sMcFunc; + for (int32_t k = 0; k < 2; k++) { + for (int32_t w = 0; w < 2; w++) { + int32_t width = 8 << w; + int32_t height = 16; + uint8_t uAnchor[4][MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + ENFORCE_STACK_ALIGN_2D (uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); + uint8_t* uAnchors[4]; + int16_t pBuf[MC_BUFF_DST_STRIDE]; + uAnchors[0] = &uAnchor[0][4][4]; + uAnchors[1] = &uAnchor[1][4][4]; + uAnchors[2] = &uAnchor[2][4][4]; + uAnchors[3] = &uAnchor[3][4][4]; + + memset (uAnchor, 0, 4 * sizeof (uint8_t)*MC_BUFF_HEIGHT * MC_BUFF_DST_STRIDE); + memset (uDstTest, 0, sizeof (uint8_t)*MC_BUFF_HEIGHT * MC_BUFF_DST_STRIDE); + for (int32_t j = 0; j < MC_BUFF_HEIGHT; j++) { + for (int32_t i = 0; i < MC_BUFF_SRC_STRIDE; i++) { + uAnchor[0][j][i] = uSrcTest[j][i] = rand() % 256; + } + } + + uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL); + WelsInitMcFuncs (&sMcFunc, uiCpuFlag); + + MCHalfPelFilterAnchor (uAnchors[1], uAnchors[2], uAnchors[3], uAnchors[0], MC_BUFF_SRC_STRIDE, width, height, pBuf + 4); + sMcFunc.pfLumaHalfpelHor (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uAnchor[1][4 + j][4 + i], uDstTest[j][i]); + } + } + sMcFunc.pfLumaHalfpelVer (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width, height + 1); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uAnchor[2][4 + j][4 + i], uDstTest[j][i]); + } + } + sMcFunc.pfLumaHalfpelCen (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height + 1); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uAnchor[3][4 + j][4 + i], uDstTest[j][i]); + } + } + } + } +} diff --git a/test/encoder/targets.mk b/test/encoder/targets.mk index 4f8c9cb4..87279552 100644 --- a/test/encoder/targets.mk +++ b/test/encoder/targets.mk @@ -10,6 +10,7 @@ ENCODER_UNITTEST_CPP_SRCS=\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MBCopy.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryZero.cpp\ + $(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionCompensation.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_ParameterSetStrategy.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_Reconstruct.cpp\ diff --git a/test/mc_test_common.h b/test/mc_test_common.h new file mode 100644 index 00000000..12c85ff8 --- /dev/null +++ b/test/mc_test_common.h @@ -0,0 +1,249 @@ +#define MC_BUFF_SRC_STRIDE 32 +#define MC_BUFF_DST_STRIDE 32 +#define MC_BUFF_HEIGHT 30 + +/**********************MC Unit Test Anchor Code Begin******************************/ +static bool bQpelNeeded[4][4] = { + { false, true, false, true }, + { true, true, true, true }, + { false, true, false, true }, + { true, true, true, true } +}; +static int32_t iHpelRef0Array[4][4] = { + { 0, 1, 1, 1 }, + { 0, 1, 1, 1 }, + { 2, 3, 3, 3 }, + { 0, 1, 1, 1 } +}; +static int32_t iHpelRef1Array[4][4] = { + { 0, 0, 0, 0 }, + { 2, 2, 3, 2 }, + { 2, 2, 3, 2 }, + { 2, 2, 3, 2 } +}; +#define FILTER6TAP(pPixBuff, x, iStride) ((pPixBuff)[x-2*iStride] + (pPixBuff)[x+3*iStride] - 5*((pPixBuff)[x-iStride] + (pPixBuff)[x+2*iStride]) + 20*((pPixBuff)[x] + (pPixBuff)[x+iStride])) +static inline uint8_t Clip255 (int32_t x) { + return ((x & ~255) ? (-x) >> 31 & 255 : x); +} + +static void MCCopyAnchor (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, + int32_t iHeight) { + for (int32_t y = 0; y < iHeight; y++) { + memcpy (pDst, pSrc, iWidth * sizeof (uint8_t)); + pSrc += iSrcStride; + pDst += iDstStride; + } +} + +static void MCHalfPelFilterAnchor (uint8_t* pDstH, uint8_t* pDstV, uint8_t* pDstHV, uint8_t* pSrc, + int32_t iStride, int32_t iWidth, int32_t iHeight, int16_t* pBuf) { + for (int32_t y = 0; y < iHeight; y++) { + for (int32_t x = 0; x < iWidth; x++) + pDstH[x] = Clip255 ((FILTER6TAP (pSrc, x, 1) + 16) >> 5); + for (int32_t x = -2; x < iWidth + 3; x++) { + int32_t v = FILTER6TAP (pSrc, x, iStride); + pDstV[x] = Clip255 ((v + 16) >> 5); + pBuf[x + 2] = v; + } + for (int32_t x = 0; x < iWidth; x++) + pDstHV[x] = Clip255 ((FILTER6TAP (pBuf + 2, x, 1) + 512) >> 10); + pDstH += iStride; + pDstV += iStride; + pDstHV += iStride; + pSrc += iStride; + } +} + +static void PixelAvgAnchor (uint8_t* pDst, int32_t iDstStride, + uint8_t* pSrc1, int32_t iSrc1Stride, + uint8_t* pSrc2, int32_t iSrc2Stride, int32_t iWidth, int32_t iHeight) { + for (int32_t y = 0; y < iHeight; y++) { + for (int32_t x = 0; x < iWidth; x++) + pDst[x] = (pSrc1[x] + pSrc2[x] + 1) >> 1; + pDst += iDstStride; + pSrc1 += iSrc1Stride; + pSrc2 += iSrc2Stride; + } +} + +static void MCLumaAnchor (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrc[4], int32_t iSrcStride, + int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { + int32_t iMvXIdx = iMvX & 3; + int32_t iMvYIdx = iMvY & 3; + int32_t iOffset = (iMvY >> 2) * iSrcStride + (iMvX >> 2); + uint8_t* pSrc1 = pSrc[iHpelRef0Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvYIdx) == 3) * iSrcStride; + + if (bQpelNeeded[iMvYIdx][iMvXIdx]) { + uint8_t* pSrc2 = pSrc[iHpelRef1Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvXIdx) == 3); + PixelAvgAnchor (pDst, iDstStride, pSrc1, iSrcStride, pSrc2, iSrcStride, iWidth, iHeight); + } else { + MCCopyAnchor (pSrc1, iSrcStride, pDst, iDstStride, iWidth, iHeight); + } +} + +static void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, uint8_t* pSrc, int32_t iSrcStride, + int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { + uint8_t* pSrcTmp; + pSrc += (iMvY >> 3) * iSrcStride + (iMvX >> 3) * 2; + pSrcTmp = &pSrc[iSrcStride]; + + int32_t iMvXIdx = iMvX & 0x07; + int32_t iMvYIdx = iMvY & 0x07; + int32_t iBiPara0 = (8 - iMvXIdx) * (8 - iMvYIdx); + int32_t iBiPara1 = iMvXIdx * (8 - iMvYIdx); + int32_t iBiPara2 = (8 - iMvXIdx) * iMvYIdx; + int32_t iBiPara3 = iMvXIdx * iMvYIdx; + for (int32_t y = 0; y < iHeight; y++) { + for (int32_t x = 0; x < iWidth; x++) { + pDstU[x] = (iBiPara0 * pSrc[2 * x] + iBiPara1 * pSrc[2 * x + 2] + + iBiPara2 * pSrcTmp[2 * x] + iBiPara3 * pSrcTmp[2 * x + 2] + 32) >> 6; + pDstV[x] = (iBiPara0 * pSrc[2 * x + 1] + iBiPara1 * pSrc[2 * x + 3] + + iBiPara2 * pSrcTmp[2 * x + 1] + iBiPara3 * pSrcTmp[2 * x + 3] + 32) >> 6; + } + pSrc = pSrcTmp; + pSrcTmp += iSrcStride; + pDstU += iDstStride; + pDstV += iDstStride; + } +} + +/**********************MC Unit Test OPENH264 Code Begin******************************/ +#define DEF_MCCOPYTEST(pfx, iW,iH, forceC) \ +TEST(pfx##McCopy_c,iW##x##iH) \ +{ \ + SMcFunc sMcFunc; \ + int32_t iCpuCores = 1; \ + uint32_t uiCpuFlag;\ + for(int32_t k =0; k<2; k++)\ + {\ + if(k==0||forceC!=0)\ + {\ + uiCpuFlag = 0;\ + }else \ + {\ + uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores); \ + }\ + InitMcFunc(&sMcFunc, uiCpuFlag); \ + uint8_t uSrcAnchor[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ + uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ + ENFORCE_STACK_ALIGN_2D(uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ + ENFORCE_STACK_ALIGN_2D(uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ + for(int32_t j=0;j