From 7d9fc352350a2342cb7627b9f8d6947f55d4874f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 11:17:34 +0200 Subject: [PATCH 01/13] Unify the encoder MC init function with the decoder This simplifies the code a little, by passing a pointer to the sub-struct SMcFunc instead of to the full SWelsFuncPtrList, which isn't necessary. --- codec/encoder/core/inc/mc.h | 2 +- codec/encoder/core/src/encoder.cpp | 2 +- codec/encoder/core/src/mc.cpp | 58 +++++++++++++++--------------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/codec/encoder/core/inc/mc.h b/codec/encoder/core/inc/mc.h index fccff2ce..c8aa68ab 100644 --- a/codec/encoder/core/inc/mc.h +++ b/codec/encoder/core/inc/mc.h @@ -45,7 +45,7 @@ //x y means dx(mv[0] & 3) and dy(mv[1] & 3) namespace WelsEnc { -void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag); +void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag); } #endif//WELS_MC_H__ diff --git a/codec/encoder/core/src/encoder.cpp b/codec/encoder/core/src/encoder.cpp index 5d7982bb..db71bb82 100644 --- a/codec/encoder/core/src/encoder.cpp +++ b/codec/encoder/core/src/encoder.cpp @@ -209,7 +209,7 @@ int32_t InitFunctionPointers (sWelsEncCtx* pEncCtx, SWelsSvcCodingParam* pParam, /* Motion compensation */ /*init pixel average function*/ /*get one column or row pixel when refinement*/ - WelsInitMcFuncs (pFuncList, uiCpuFlag); + WelsInitMcFuncs (&pFuncList->sMcFuncs, uiCpuFlag); InitCoeffFunc (pFuncList,uiCpuFlag,pParam->iEntropyCodingModeFlag); WelsInitEncodingFuncs (pFuncList, uiCpuFlag); diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp index 36cfd23e..e14873b1 100644 --- a/codec/encoder/core/src/mc.cpp +++ b/codec/encoder/core/src/mc.cpp @@ -766,7 +766,7 @@ void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* #endif typedef void (*PixelAvgFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t); -void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { +void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) { static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c}; static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x] @@ -799,50 +799,50 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { McHorVer03WidthEq16_AArch64_neon, EncMcHorVer13_AArch64_neon, EncMcHorVer23_AArch64_neon, EncMcHorVer33_AArch64_neon }; #endif - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20_c; - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02_c; - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_c; - memcpy (pFuncList->sMcFuncs.pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc)); - pFuncList->sMcFuncs.pfChromaMc = McChroma_c; - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16)); + pMcFuncs->pfLumaHalfpelHor = McHorVer20_c; + pMcFuncs->pfLumaHalfpelVer = McHorVer02_c; + pMcFuncs->pfLumaHalfpelCen = McHorVer22_c; + memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc)); + pMcFuncs->pfChromaMc = McChroma_c; + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16)); #if defined (X86_ASM) if (uiCpuFlag & WELS_CPU_SSE2) { - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_sse2; - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_sse2; - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2; - pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx; - pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2; - pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2; - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2)); + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2; + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2; + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2; + pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx; + pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2; + pMcFuncs->pfChromaMc = McChroma_sse2; + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2)); } if (uiCpuFlag & WELS_CPU_SSSE3) { - pFuncList->sMcFuncs.pfChromaMc = McChroma_ssse3; + pMcFuncs->pfChromaMc = McChroma_ssse3; } #endif //(X86_ASM) #if defined(HAVE_NEON) if (uiCpuFlag & WELS_CPU_NEON) { - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon)); - pFuncList->sMcFuncs.pfChromaMc = EncMcChroma_neon; - pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon; - pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon; - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1 + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon)); + pMcFuncs->pfChromaMc = EncMcChroma_neon; + pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon; + pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1 } #endif #if defined(HAVE_NEON_AARCH64) if (uiCpuFlag & WELS_CPU_NEON) { - memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon, + memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon, sizeof (pWelsMcFuncWidthEq16_AArch64_neon)); - pFuncList->sMcFuncs.pfChromaMc = EncMcChroma_AArch64_neon; - pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon; - pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon; - pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16 - pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1 + pMcFuncs->pfChromaMc = EncMcChroma_AArch64_neon; + pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon; + pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon; + pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16 + pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16 + pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1 } #endif } From 9fc9acfd14450eaa45d7d471ee1c379628d85ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 11:20:00 +0200 Subject: [PATCH 02/13] Avoid a duplicate local typedef --- codec/encoder/core/src/mc.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp index e14873b1..1dd4572d 100644 --- a/codec/encoder/core/src/mc.cpp +++ b/codec/encoder/core/src/mc.cpp @@ -765,9 +765,8 @@ void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* } #endif -typedef void (*PixelAvgFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t); void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) { - static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c}; + static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c}; static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x] McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c, From 420a81afe411e3bf471550160c439ef84d4da22b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 11:58:33 +0200 Subject: [PATCH 03/13] Unify the order of iX and iY in DecUT_MotionCompensation They are in the order iX, iY in the rest of the file. --- test/decoder/DecUT_MotionCompensation.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index 46039e3d..d6347c63 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -116,7 +116,7 @@ void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, uint8_t } /**********************MC Unit Test OPENH264 Code Begin******************************/ -#define DEF_MCCOPYTEST(iH,iW, forceC) \ +#define DEF_MCCOPYTEST(iW,iH, forceC) \ TEST(McCopy_c,iW##x##iH) \ { \ SMcFunc sMcFunc; \ @@ -158,8 +158,8 @@ TEST(McCopy_c,iW##x##iH) \ } DEF_MCCOPYTEST (2, 2, 1) -DEF_MCCOPYTEST (2, 4, 0) -DEF_MCCOPYTEST (4, 2, 1) +DEF_MCCOPYTEST (2, 4, 1) +DEF_MCCOPYTEST (4, 2, 0) DEF_MCCOPYTEST (4, 4, 0) DEF_MCCOPYTEST (4, 8, 0) DEF_MCCOPYTEST (8, 4, 0) From 8ecb8b420085d128c1481f550297ea4604ab128a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 12:19:35 +0200 Subject: [PATCH 04/13] Remove unused includes in DecUT_MotionCompensation --- test/decoder/DecUT_MotionCompensation.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index d6347c63..f1badda1 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -1,8 +1,6 @@ #include #include "codec_def.h" #include "mc.h" -#include "mem_align.h" -#include "cpu_core.h" #include "cpu.h" using namespace WelsDec; From 478af8f00c88aa9e482b95e672344227c162e241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 13:11:18 +0200 Subject: [PATCH 05/13] Unify the pfChromaMc function signature with the decoder version Instead of passing a struct with x/y, pass them as two separate parameters. --- codec/encoder/core/inc/wels_func_ptr_def.h | 2 +- codec/encoder/core/src/mc.cpp | 30 ++++++++++---------- codec/encoder/core/src/svc_base_layer_md.cpp | 24 ++++++++-------- codec/encoder/core/src/svc_mode_decision.cpp | 4 +-- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/codec/encoder/core/inc/wels_func_ptr_def.h b/codec/encoder/core/inc/wels_func_ptr_def.h index c7cef638..fb5394b0 100644 --- a/codec/encoder/core/inc/wels_func_ptr_def.h +++ b/codec/encoder/core/inc/wels_func_ptr_def.h @@ -75,7 +75,7 @@ typedef int32_t (*PQuantizationHadamardFunc) (int16_t* pRes, const int16_t kiFF, int16_t* pBlock); typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY mv, int32_t iWidth, int32_t iHeight); + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight); typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight); diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp index 1dd4572d..55b5f945 100644 --- a/codec/encoder/core/src/mc.cpp +++ b/codec/encoder/core/src/mc.cpp @@ -367,11 +367,11 @@ static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* p } void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY mv, int32_t iWidth, int32_t iHeight) + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) //pSrc has been added the offset of mv { - const int32_t kiDx = mv.iMvX & 0x07; - const int32_t kiDy = mv.iMvY & 0x07; + const int32_t kiDx = iMvX & 0x07; + const int32_t kiDy = iMvY & 0x07; if (0 == kiDx && 0 == kiDy) { McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight); @@ -543,9 +543,9 @@ static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, const uint8_t* pABCD, int32_t iHeigh); void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; static const McChromaWidthEqx kpfFuncs[2] = { McChromaWidthEq4_mmx, McChromaWidthEq8_sse2 @@ -559,9 +559,9 @@ void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int3 } void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; static const McChromaWidthEqx kpfFuncs[2] = { McChromaWidthEq4_mmx, @@ -651,9 +651,9 @@ void EncMcHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight); } void EncMcChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; if (0 == kiD8x && 0 == kiD8y) { if (8 == iWidth) McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); @@ -748,9 +748,9 @@ void EncMcHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight); } void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, - SMVUnitXY sMv, int32_t iWidth, int32_t iHeight) { - const int32_t kiD8x = sMv.iMvX & 0x07; - const int32_t kiD8y = sMv.iMvY & 0x07; + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + const int32_t kiD8x = iMvX & 0x07; + const int32_t kiD8y = iMvY & 0x07; if (0 == kiD8x && 0 == kiD8y) { if (8 == iWidth) McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight); diff --git a/codec/encoder/core/src/svc_base_layer_md.cpp b/codec/encoder/core/src/svc_base_layer_md.cpp index f5f0908b..3e587fef 100644 --- a/codec/encoder/core/src/svc_base_layer_md.cpp +++ b/codec/encoder/core/src/svc_base_layer_md.cpp @@ -1248,8 +1248,8 @@ void WelsMdBackgroundMbEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, } //MC pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma, iLineSizeY, pDstLuma, 16, 16); - pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp, 8, 8); //Cb - pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp, 8, 8); //Cr + pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb + pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr pCurMb->uiCbp = 0; pMbCache->bCollocatedPredFlag = true; @@ -1350,12 +1350,12 @@ bool WelsMdPSkipEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCac const int32_t iStrideUV = (sQpelMvp.iMvY >> 1) * iLineSizeUV + (sQpelMvp.iMvX >> 1); pRefCb += iStrideUV; - pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp, 8, 8); //Cb + pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb iSadCostChroma = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[1], pCurLayer->iEncStride[1], pDstCb, 8); pRefCr += iStrideUV; - pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp, 8, 8); //Cr + pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr iSadCostChroma += pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pMbCache->SPicData.pEncMb[2], pCurLayer->iEncStride[2], pDstCr, 8); @@ -1463,8 +1463,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3); pTmpRefCb = pRefCb + iMvStride; pTmpRefCr = pRefCr + iMvStride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pDstCb, 8, *pMv, 8, 8); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pDstCr, 8, *pMv, 8, 8); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pDstCb, 8, pMv->iMvX, pMv->iMvY, 8, 8); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pDstCr, 8, pMv->iMvX, pMv->iMvY, 8, 8); //Cr pWelsMd->iCostSkipMb = pEncCtx->pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0], pCurDqLayer->iEncStride[0], pDstLuma, 16); @@ -1498,8 +1498,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM pTmpRefCr = pRefCr + iRefBlk4Stride + iMvStride; pTmpDstCb = pDstCb + iDstBlk4Stride; pTmpDstCr = pDstCr + iDstBlk4Stride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, *pMv, 8, 4); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, *pMv, 8, 4); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 8, 4); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 8, 4); //Cr } break; @@ -1526,8 +1526,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM pTmpRefCr = pRefCr + iRefBlk4Stride + iMvStride; pTmpDstCb = pDstCb + iRefBlk4Stride; pTmpDstCr = pDstCr + iRefBlk4Stride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, *pMv, 4, 8); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, *pMv, 4, 8); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 4, 8); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 4, 8); //Cr } break; @@ -1560,8 +1560,8 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM pTmpDstCb = pDstCb + iDstBlk4Stride; pTmpRefCr = pRefCr + iRefBlk4Stride; pTmpDstCr = pDstCr + iDstBlk4Stride; - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, *pMv, 4, 4); //Cb - pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, *pMv, 4, 4); //Cr + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 4, 4); //Cb + pEncCtx->pFuncList->sMcFuncs.pfChromaMc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 4, 4); //Cr } break; diff --git a/codec/encoder/core/src/svc_mode_decision.cpp b/codec/encoder/core/src/svc_mode_decision.cpp index a0615396..e6358474 100644 --- a/codec/encoder/core/src/svc_mode_decision.cpp +++ b/codec/encoder/core/src/svc_mode_decision.cpp @@ -415,8 +415,8 @@ void SvcMdSCDMbEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCach } //MC pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma + iOffsetY, iLineSizeY, pDstLuma, 16, 16); - pFunc->sMcFuncs.pfChromaMc (pRefCb + iOffsetUV, iLineSizeUV, pDstCb, 8, sMvp, 8, 8); - pFunc->sMcFuncs.pfChromaMc (pRefCr + iOffsetUV, iLineSizeUV, pDstCr, 8, sMvp, 8, 8); + pFunc->sMcFuncs.pfChromaMc (pRefCb + iOffsetUV, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); + pFunc->sMcFuncs.pfChromaMc (pRefCr + iOffsetUV, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); pCurMb->uiCbp = 0; pWelsMd->iCostLuma = 0; From cf3e7b5deca3198737f837fd9a1d28702e52f1da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 13:15:02 +0200 Subject: [PATCH 06/13] Make local functions and tables in DecUT_MotionCompensation static --- test/decoder/DecUT_MotionCompensation.cpp | 28 +++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index f1badda1..6e20afc4 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -9,19 +9,19 @@ using namespace WelsDec; #define MC_BUFF_HEIGHT 30 /**********************MC Unit Test Anchor Code Begin******************************/ -bool bQpelNeeded[4][4] = { +static bool bQpelNeeded[4][4] = { { false, true, false, true }, { true, true, true, true }, { false, true, false, true }, { true, true, true, true } }; -int32_t iHpelRef0Array[4][4] = { +static int32_t iHpelRef0Array[4][4] = { { 0, 1, 1, 1 }, { 0, 1, 1, 1 }, { 2, 3, 3, 3 }, { 0, 1, 1, 1 } }; -int32_t iHpelRef1Array[4][4] = { +static int32_t iHpelRef1Array[4][4] = { { 0, 0, 0, 0 }, { 2, 2, 3, 2 }, { 2, 2, 3, 2 }, @@ -32,8 +32,8 @@ static inline uint8_t Clip255 (int32_t x) { return ((x & ~255) ? (-x) >> 31 & 255 : x); } -void MCCopyAnchor (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, - int32_t iHeight) { +static void MCCopyAnchor (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, + int32_t iHeight) { for (int32_t y = 0; y < iHeight; y++) { memcpy (pDst, pSrc, iWidth * sizeof (uint8_t)); pSrc += iSrcStride; @@ -41,8 +41,8 @@ void MCCopyAnchor (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDs } } -void MCHalfPelFilterAnchor (uint8_t* pDstH, uint8_t* pDstV, uint8_t* pDstHV, uint8_t* pSrc, - int32_t iStride, int32_t iWidth, int32_t iHeight, int16_t* pBuf) { +static void MCHalfPelFilterAnchor (uint8_t* pDstH, uint8_t* pDstV, uint8_t* pDstHV, uint8_t* pSrc, + int32_t iStride, int32_t iWidth, int32_t iHeight, int16_t* pBuf) { for (int32_t y = 0; y < iHeight; y++) { for (int32_t x = 0; x < iWidth; x++) pDstH[x] = Clip255 ((FILTER6TAP (pSrc, x, 1) + 16) >> 5); @@ -60,9 +60,9 @@ void MCHalfPelFilterAnchor (uint8_t* pDstH, uint8_t* pDstV, uint8_t* pDstHV, uin } } -void PixelAvgAnchor (uint8_t* pDst, int32_t iDstStride, - uint8_t* pSrc1, int32_t iSrc1Stride, - uint8_t* pSrc2, int32_t iSrc2Stride, int32_t iWidth, int32_t iHeight) { +static void PixelAvgAnchor (uint8_t* pDst, int32_t iDstStride, + uint8_t* pSrc1, int32_t iSrc1Stride, + uint8_t* pSrc2, int32_t iSrc2Stride, int32_t iWidth, int32_t iHeight) { for (int32_t y = 0; y < iHeight; y++) { for (int32_t x = 0; x < iWidth; x++) pDst[x] = (pSrc1[x] + pSrc2[x] + 1) >> 1; @@ -72,8 +72,8 @@ void PixelAvgAnchor (uint8_t* pDst, int32_t iDstStride, } } -void MCLumaAnchor (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrc[4], int32_t iSrcStride, - int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { +static void MCLumaAnchor (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrc[4], int32_t iSrcStride, + int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { int32_t iMvXIdx = iMvX & 3; int32_t iMvYIdx = iMvY & 3; int32_t iOffset = (iMvY >> 2) * iSrcStride + (iMvX >> 2); @@ -87,8 +87,8 @@ void MCLumaAnchor (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrc[4], int32 } } -void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, uint8_t* pSrc, int32_t iSrcStride, - int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { +static void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, uint8_t* pSrc, int32_t iSrcStride, + int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { uint8_t* pSrcTmp; pSrc += (iMvY >> 3) * iSrcStride + (iMvX >> 3) * 2; pSrcTmp = &pSrc[iSrcStride]; From 49af2b592db59d09c58cd880ece236950581369e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 13:25:22 +0200 Subject: [PATCH 07/13] Split DecUT_MotionCompensation to a shareable header This allows adding a version of the same test for the encoder. --- test/decoder/DecUT_MotionCompensation.cpp | 348 +--------------------- test/mc_test_common.h | 330 ++++++++++++++++++++ 2 files changed, 341 insertions(+), 337 deletions(-) create mode 100644 test/mc_test_common.h diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index 6e20afc4..4acf6597 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -4,216 +4,18 @@ #include "cpu.h" using namespace WelsDec; -#define MC_BUFF_SRC_STRIDE 32 -#define MC_BUFF_DST_STRIDE 32 -#define MC_BUFF_HEIGHT 30 - -/**********************MC Unit Test Anchor Code Begin******************************/ -static bool bQpelNeeded[4][4] = { - { false, true, false, true }, - { true, true, true, true }, - { false, true, false, true }, - { true, true, true, true } -}; -static int32_t iHpelRef0Array[4][4] = { - { 0, 1, 1, 1 }, - { 0, 1, 1, 1 }, - { 2, 3, 3, 3 }, - { 0, 1, 1, 1 } -}; -static int32_t iHpelRef1Array[4][4] = { - { 0, 0, 0, 0 }, - { 2, 2, 3, 2 }, - { 2, 2, 3, 2 }, - { 2, 2, 3, 2 } -}; -#define FILTER6TAP(pPixBuff, x, iStride) ((pPixBuff)[x-2*iStride] + (pPixBuff)[x+3*iStride] - 5*((pPixBuff)[x-iStride] + (pPixBuff)[x+2*iStride]) + 20*((pPixBuff)[x] + (pPixBuff)[x+iStride])) -static inline uint8_t Clip255 (int32_t x) { - return ((x & ~255) ? (-x) >> 31 & 255 : x); -} - -static void MCCopyAnchor (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, - int32_t iHeight) { - for (int32_t y = 0; y < iHeight; y++) { - memcpy (pDst, pSrc, iWidth * sizeof (uint8_t)); - pSrc += iSrcStride; - pDst += iDstStride; - } -} - -static void MCHalfPelFilterAnchor (uint8_t* pDstH, uint8_t* pDstV, uint8_t* pDstHV, uint8_t* pSrc, - int32_t iStride, int32_t iWidth, int32_t iHeight, int16_t* pBuf) { - for (int32_t y = 0; y < iHeight; y++) { - for (int32_t x = 0; x < iWidth; x++) - pDstH[x] = Clip255 ((FILTER6TAP (pSrc, x, 1) + 16) >> 5); - for (int32_t x = -2; x < iWidth + 3; x++) { - int32_t v = FILTER6TAP (pSrc, x, iStride); - pDstV[x] = Clip255 ((v + 16) >> 5); - pBuf[x + 2] = v; - } - for (int32_t x = 0; x < iWidth; x++) - pDstHV[x] = Clip255 ((FILTER6TAP (pBuf + 2, x, 1) + 512) >> 10); - pDstH += iStride; - pDstV += iStride; - pDstHV += iStride; - pSrc += iStride; - } -} - -static void PixelAvgAnchor (uint8_t* pDst, int32_t iDstStride, - uint8_t* pSrc1, int32_t iSrc1Stride, - uint8_t* pSrc2, int32_t iSrc2Stride, int32_t iWidth, int32_t iHeight) { - for (int32_t y = 0; y < iHeight; y++) { - for (int32_t x = 0; x < iWidth; x++) - pDst[x] = (pSrc1[x] + pSrc2[x] + 1) >> 1; - pDst += iDstStride; - pSrc1 += iSrc1Stride; - pSrc2 += iSrc2Stride; - } -} - -static void MCLumaAnchor (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrc[4], int32_t iSrcStride, - int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { - int32_t iMvXIdx = iMvX & 3; - int32_t iMvYIdx = iMvY & 3; - int32_t iOffset = (iMvY >> 2) * iSrcStride + (iMvX >> 2); - uint8_t* pSrc1 = pSrc[iHpelRef0Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvYIdx) == 3) * iSrcStride; - - if (bQpelNeeded[iMvYIdx][iMvXIdx]) { - uint8_t* pSrc2 = pSrc[iHpelRef1Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvXIdx) == 3); - PixelAvgAnchor (pDst, iDstStride, pSrc1, iSrcStride, pSrc2, iSrcStride, iWidth, iHeight); - } else { - MCCopyAnchor (pSrc1, iSrcStride, pDst, iDstStride, iWidth, iHeight); - } -} - -static void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, uint8_t* pSrc, int32_t iSrcStride, - int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { - uint8_t* pSrcTmp; - pSrc += (iMvY >> 3) * iSrcStride + (iMvX >> 3) * 2; - pSrcTmp = &pSrc[iSrcStride]; - - int32_t iMvXIdx = iMvX & 0x07; - int32_t iMvYIdx = iMvY & 0x07; - int32_t iBiPara0 = (8 - iMvXIdx) * (8 - iMvYIdx); - int32_t iBiPara1 = iMvXIdx * (8 - iMvYIdx); - int32_t iBiPara2 = (8 - iMvXIdx) * iMvYIdx; - int32_t iBiPara3 = iMvXIdx * iMvYIdx; - for (int32_t y = 0; y < iHeight; y++) { - for (int32_t x = 0; x < iWidth; x++) { - pDstU[x] = (iBiPara0 * pSrc[2 * x] + iBiPara1 * pSrc[2 * x + 2] + - iBiPara2 * pSrcTmp[2 * x] + iBiPara3 * pSrcTmp[2 * x + 2] + 32) >> 6; - pDstV[x] = (iBiPara0 * pSrc[2 * x + 1] + iBiPara1 * pSrc[2 * x + 3] + - iBiPara2 * pSrcTmp[2 * x + 1] + iBiPara3 * pSrcTmp[2 * x + 3] + 32) >> 6; - } - pSrc = pSrcTmp; - pSrcTmp += iSrcStride; - pDstU += iDstStride; - pDstV += iDstStride; - } -} - -/**********************MC Unit Test OPENH264 Code Begin******************************/ -#define DEF_MCCOPYTEST(iW,iH, forceC) \ -TEST(McCopy_c,iW##x##iH) \ -{ \ - SMcFunc sMcFunc; \ - int32_t iCpuCores = 1; \ - uint32_t uiCpuFlag;\ - for(int32_t k =0; k<2; k++)\ - {\ - if(k==0||forceC!=0)\ - {\ - uiCpuFlag = 0;\ - }else \ - {\ - uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores); \ - }\ - InitMcFunc(&sMcFunc, uiCpuFlag); \ - uint8_t uSrcAnchor[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ - uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ - ENFORCE_STACK_ALIGN_2D(uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ - ENFORCE_STACK_ALIGN_2D(uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ - for(int32_t j=0;j> 31 & 255 : x); +} + +static void MCCopyAnchor (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, + int32_t iHeight) { + for (int32_t y = 0; y < iHeight; y++) { + memcpy (pDst, pSrc, iWidth * sizeof (uint8_t)); + pSrc += iSrcStride; + pDst += iDstStride; + } +} + +static void MCHalfPelFilterAnchor (uint8_t* pDstH, uint8_t* pDstV, uint8_t* pDstHV, uint8_t* pSrc, + int32_t iStride, int32_t iWidth, int32_t iHeight, int16_t* pBuf) { + for (int32_t y = 0; y < iHeight; y++) { + for (int32_t x = 0; x < iWidth; x++) + pDstH[x] = Clip255 ((FILTER6TAP (pSrc, x, 1) + 16) >> 5); + for (int32_t x = -2; x < iWidth + 3; x++) { + int32_t v = FILTER6TAP (pSrc, x, iStride); + pDstV[x] = Clip255 ((v + 16) >> 5); + pBuf[x + 2] = v; + } + for (int32_t x = 0; x < iWidth; x++) + pDstHV[x] = Clip255 ((FILTER6TAP (pBuf + 2, x, 1) + 512) >> 10); + pDstH += iStride; + pDstV += iStride; + pDstHV += iStride; + pSrc += iStride; + } +} + +static void PixelAvgAnchor (uint8_t* pDst, int32_t iDstStride, + uint8_t* pSrc1, int32_t iSrc1Stride, + uint8_t* pSrc2, int32_t iSrc2Stride, int32_t iWidth, int32_t iHeight) { + for (int32_t y = 0; y < iHeight; y++) { + for (int32_t x = 0; x < iWidth; x++) + pDst[x] = (pSrc1[x] + pSrc2[x] + 1) >> 1; + pDst += iDstStride; + pSrc1 += iSrc1Stride; + pSrc2 += iSrc2Stride; + } +} + +static void MCLumaAnchor (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrc[4], int32_t iSrcStride, + int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { + int32_t iMvXIdx = iMvX & 3; + int32_t iMvYIdx = iMvY & 3; + int32_t iOffset = (iMvY >> 2) * iSrcStride + (iMvX >> 2); + uint8_t* pSrc1 = pSrc[iHpelRef0Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvYIdx) == 3) * iSrcStride; + + if (bQpelNeeded[iMvYIdx][iMvXIdx]) { + uint8_t* pSrc2 = pSrc[iHpelRef1Array[iMvYIdx][iMvXIdx]] + iOffset + ((iMvXIdx) == 3); + PixelAvgAnchor (pDst, iDstStride, pSrc1, iSrcStride, pSrc2, iSrcStride, iWidth, iHeight); + } else { + MCCopyAnchor (pSrc1, iSrcStride, pDst, iDstStride, iWidth, iHeight); + } +} + +static void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, uint8_t* pSrc, int32_t iSrcStride, + int32_t iMvX, int32_t iMvY, int32_t iWidth, int32_t iHeight) { + uint8_t* pSrcTmp; + pSrc += (iMvY >> 3) * iSrcStride + (iMvX >> 3) * 2; + pSrcTmp = &pSrc[iSrcStride]; + + int32_t iMvXIdx = iMvX & 0x07; + int32_t iMvYIdx = iMvY & 0x07; + int32_t iBiPara0 = (8 - iMvXIdx) * (8 - iMvYIdx); + int32_t iBiPara1 = iMvXIdx * (8 - iMvYIdx); + int32_t iBiPara2 = (8 - iMvXIdx) * iMvYIdx; + int32_t iBiPara3 = iMvXIdx * iMvYIdx; + for (int32_t y = 0; y < iHeight; y++) { + for (int32_t x = 0; x < iWidth; x++) { + pDstU[x] = (iBiPara0 * pSrc[2 * x] + iBiPara1 * pSrc[2 * x + 2] + + iBiPara2 * pSrcTmp[2 * x] + iBiPara3 * pSrcTmp[2 * x + 2] + 32) >> 6; + pDstV[x] = (iBiPara0 * pSrc[2 * x + 1] + iBiPara1 * pSrc[2 * x + 3] + + iBiPara2 * pSrcTmp[2 * x + 1] + iBiPara3 * pSrcTmp[2 * x + 3] + 32) >> 6; + } + pSrc = pSrcTmp; + pSrcTmp += iSrcStride; + pDstU += iDstStride; + pDstV += iDstStride; + } +} + +/**********************MC Unit Test OPENH264 Code Begin******************************/ +#define DEF_MCCOPYTEST(iW,iH, forceC) \ +TEST(McCopy_c,iW##x##iH) \ +{ \ + SMcFunc sMcFunc; \ + int32_t iCpuCores = 1; \ + uint32_t uiCpuFlag;\ + for(int32_t k =0; k<2; k++)\ + {\ + if(k==0||forceC!=0)\ + {\ + uiCpuFlag = 0;\ + }else \ + {\ + uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores); \ + }\ + InitMcFunc(&sMcFunc, uiCpuFlag); \ + uint8_t uSrcAnchor[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ + uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ + ENFORCE_STACK_ALIGN_2D(uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ + ENFORCE_STACK_ALIGN_2D(uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); \ + for(int32_t j=0;j Date: Mon, 26 Jan 2015 13:44:18 +0200 Subject: [PATCH 08/13] Prepare the shared MC test code for adding an encoder MC test --- test/decoder/DecUT_MotionCompensation.cpp | 61 ++++---- test/mc_test_common.h | 176 +++++++++++----------- 2 files changed, 122 insertions(+), 115 deletions(-) diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index 4acf6597..af51b901 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -4,34 +4,41 @@ #include "cpu.h" using namespace WelsDec; -#define DEF_MCCOPYTESTS \ -DEF_MCCOPYTEST (2, 2, 1) \ -DEF_MCCOPYTEST (2, 4, 1) \ -DEF_MCCOPYTEST (4, 2, 0) \ -DEF_MCCOPYTEST (4, 4, 0) \ -DEF_MCCOPYTEST (4, 8, 0) \ -DEF_MCCOPYTEST (8, 4, 0) \ -DEF_MCCOPYTEST (8, 8, 0) \ -DEF_MCCOPYTEST (16, 8, 0) \ -DEF_MCCOPYTEST (8, 16, 0) \ -DEF_MCCOPYTEST (16, 16, 0) +#define LUMA_FUNC(funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) \ + sMcFunc.pMcLumaFunc (src, srcstride, dst, dststride, mvx, mvy, width, height) -#define DEF_LUMA_MCTEST(a,b) \ -DEF_LUMA_MCTEST_SUBCASE(a,b,4,4) \ -DEF_LUMA_MCTEST_SUBCASE(a,b,4,8) \ -DEF_LUMA_MCTEST_SUBCASE(a,b,8,4) \ -DEF_LUMA_MCTEST_SUBCASE(a,b,8,8) \ -DEF_LUMA_MCTEST_SUBCASE(a,b,16,8) \ -DEF_LUMA_MCTEST_SUBCASE(a,b,8,16) \ -DEF_LUMA_MCTEST_SUBCASE(a,b,16,16) +#define CHROMA_FUNC sMcFunc.pMcChromaFunc -#define DEF_CHROMA_MCTEST(a,b) \ -DEF_CHROMA_MCTEST_SUBCASE(a,b,2,2) \ -DEF_CHROMA_MCTEST_SUBCASE(a,b,2,4) \ -DEF_CHROMA_MCTEST_SUBCASE(a,b,4,2) \ -DEF_CHROMA_MCTEST_SUBCASE(a,b,4,4) \ -DEF_CHROMA_MCTEST_SUBCASE(a,b,4,8) \ -DEF_CHROMA_MCTEST_SUBCASE(a,b,8,4) \ -DEF_CHROMA_MCTEST_SUBCASE(a,b,8,8) +#define PREFIX + +#define DEF_MCCOPYTESTS(pfx) \ +DEF_MCCOPYTEST (pfx, 2, 2, 1) \ +DEF_MCCOPYTEST (pfx, 2, 4, 1) \ +DEF_MCCOPYTEST (pfx, 4, 2, 0) \ +DEF_MCCOPYTEST (pfx, 4, 4, 0) \ +DEF_MCCOPYTEST (pfx, 4, 8, 0) \ +DEF_MCCOPYTEST (pfx, 8, 4, 0) \ +DEF_MCCOPYTEST (pfx, 8, 8, 0) \ +DEF_MCCOPYTEST (pfx, 16, 8, 0) \ +DEF_MCCOPYTEST (pfx, 8, 16, 0) \ +DEF_MCCOPYTEST (pfx, 16, 16, 0) + +#define DEF_LUMA_MCTEST(pfx,a,b) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,4,4) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,4,8) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,8,4) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,8,8) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,8) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,8,16) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,16) + +#define DEF_CHROMA_MCTEST(pfx,a,b) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,2,2) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,2,4) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,2) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,4) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,8) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,4) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,8) #include "mc_test_common.h" diff --git a/test/mc_test_common.h b/test/mc_test_common.h index 37c7c3e0..bf1ee6e6 100644 --- a/test/mc_test_common.h +++ b/test/mc_test_common.h @@ -108,8 +108,8 @@ static void MCChromaAnchor (uint8_t* pDstU, uint8_t* pDstV, int32_t iDstStride, } /**********************MC Unit Test OPENH264 Code Begin******************************/ -#define DEF_MCCOPYTEST(iW,iH, forceC) \ -TEST(McCopy_c,iW##x##iH) \ +#define DEF_MCCOPYTEST(pfx, iW,iH, forceC) \ +TEST(pfx##McCopy_c,iW##x##iH) \ { \ SMcFunc sMcFunc; \ int32_t iCpuCores = 1; \ @@ -138,7 +138,7 @@ TEST(McCopy_c,iW##x##iH) \ memset(uDstAnchor,0,sizeof(uint8_t)*MC_BUFF_HEIGHT*MC_BUFF_DST_STRIDE);\ memset(uDstTest,0,sizeof(uint8_t)*MC_BUFF_HEIGHT*MC_BUFF_DST_STRIDE); \ MCCopyAnchor(uSrcAnchor[0],MC_BUFF_SRC_STRIDE,uDstAnchor[0],MC_BUFF_DST_STRIDE,iW,iH); \ - sMcFunc.pMcLumaFunc(uSrcTest[0],MC_BUFF_SRC_STRIDE,uDstTest[0],MC_BUFF_DST_STRIDE,0,0,iW,iH); \ + LUMA_FUNC(&sMcFunc,uSrcTest[0],MC_BUFF_SRC_STRIDE,uDstTest[0],MC_BUFF_DST_STRIDE,0,0,iW,iH); \ for(int32_t j=0;j Date: Mon, 26 Jan 2015 11:14:57 +0200 Subject: [PATCH 09/13] Add unit tests for encoder MC --- .../win32/codec_ut/codec_unittest.vcproj | 4 ++ test/encoder/EncUT_MotionCompensation.cpp | 38 +++++++++++++++++++ test/encoder/targets.mk | 1 + 3 files changed, 43 insertions(+) create mode 100644 test/encoder/EncUT_MotionCompensation.cpp diff --git a/test/build/win32/codec_ut/codec_unittest.vcproj b/test/build/win32/codec_ut/codec_unittest.vcproj index 37c8f29b..16199ffa 100644 --- a/test/build/win32/codec_ut/codec_unittest.vcproj +++ b/test/build/win32/codec_ut/codec_unittest.vcproj @@ -414,6 +414,10 @@ RelativePath="..\..\..\encoder\EncUT_MemoryZero.cpp" > + + diff --git a/test/encoder/EncUT_MotionCompensation.cpp b/test/encoder/EncUT_MotionCompensation.cpp new file mode 100644 index 00000000..255d7894 --- /dev/null +++ b/test/encoder/EncUT_MotionCompensation.cpp @@ -0,0 +1,38 @@ +#include +#include "codec_def.h" +#include "mc.h" +#include "cpu.h" +using namespace WelsEnc; + +static void McLumaFunc (SMcFunc* pFuncs, const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, + int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) { + uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03); + ASSERT_EQ (iWidth, 16); + pFuncs->pfLumaQuarpelMc[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight); +} + +#define InitMcFunc WelsInitMcFuncs + +#define LUMA_FUNC(funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) \ + McLumaFunc (funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) + +#define CHROMA_FUNC sMcFunc.pfChromaMc + +#define PREFIX Enc + +#define DEF_MCCOPYTESTS(pfx) \ +DEF_MCCOPYTEST (pfx, 16, 8, 0) \ +DEF_MCCOPYTEST (pfx, 16, 16, 0) + +#define DEF_LUMA_MCTEST(pfx,a,b) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,8) \ +DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,16) + +#define DEF_CHROMA_MCTEST(pfx,a,b) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,2) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,4) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,8) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,4) \ +DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,8) + +#include "mc_test_common.h" diff --git a/test/encoder/targets.mk b/test/encoder/targets.mk index 4f8c9cb4..87279552 100644 --- a/test/encoder/targets.mk +++ b/test/encoder/targets.mk @@ -10,6 +10,7 @@ ENCODER_UNITTEST_CPP_SRCS=\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MBCopy.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryZero.cpp\ + $(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionCompensation.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_ParameterSetStrategy.cpp\ $(ENCODER_UNITTEST_SRCDIR)/EncUT_Reconstruct.cpp\ From ca97f78ae828617e877951b9f1341ac67974ce52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 14:58:19 +0200 Subject: [PATCH 10/13] Add unit tests for special cased MC functions in the encoder --- test/encoder/EncUT_MotionCompensation.cpp | 81 +++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/test/encoder/EncUT_MotionCompensation.cpp b/test/encoder/EncUT_MotionCompensation.cpp index 255d7894..c5ca22a0 100644 --- a/test/encoder/EncUT_MotionCompensation.cpp +++ b/test/encoder/EncUT_MotionCompensation.cpp @@ -36,3 +36,84 @@ DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,4) \ DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,8) #include "mc_test_common.h" + +TEST (EncMcAvg, PixelAvg) { + SMcFunc sMcFunc; + for (int32_t k = 0; k < 2; k++) { + for (int32_t w = 0; w < 2; w++) { + int32_t width = 8 << w; + int32_t height = 16; + uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL); + WelsInitMcFuncs (&sMcFunc, uiCpuFlag); + uint8_t uSrc1[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + uint8_t uSrc2[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + ENFORCE_STACK_ALIGN_2D (uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); + ENFORCE_STACK_ALIGN_2D (uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); + for (int32_t j = 0; j < MC_BUFF_HEIGHT; j++) { + for (int32_t i = 0; i < MC_BUFF_SRC_STRIDE; i++) { + uSrc1[j][i] = rand() % 256; + uSrc2[j][i] = rand() % 256; + } + } + PixelAvgAnchor (uDstAnchor[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], MC_BUFF_SRC_STRIDE, width, + height); + sMcFunc.pfSampleAveraging[w] (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], + MC_BUFF_SRC_STRIDE, height); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uDstAnchor[j][i], uDstTest[j][i]); + } + } + } + } +} + +TEST (EncMcHalfpel, LumaHalfpel) { + SMcFunc sMcFunc; + for (int32_t k = 0; k < 2; k++) { + for (int32_t w = 0; w < 2; w++) { + int32_t width = 8 << w; + int32_t height = 16; + uint8_t uAnchor[4][MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; + ENFORCE_STACK_ALIGN_2D (uint8_t, uDstTest, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16); + uint8_t* uAnchors[4]; + int16_t pBuf[MC_BUFF_DST_STRIDE]; + uAnchors[0] = &uAnchor[0][4][4]; + uAnchors[1] = &uAnchor[1][4][4]; + uAnchors[2] = &uAnchor[2][4][4]; + uAnchors[3] = &uAnchor[3][4][4]; + + memset (uAnchor, 0, 4 * sizeof (uint8_t)*MC_BUFF_HEIGHT * MC_BUFF_DST_STRIDE); + memset (uDstTest, 0, sizeof (uint8_t)*MC_BUFF_HEIGHT * MC_BUFF_DST_STRIDE); + for (int32_t j = 0; j < MC_BUFF_HEIGHT; j++) { + for (int32_t i = 0; i < MC_BUFF_SRC_STRIDE; i++) { + uAnchor[0][j][i] = uSrcTest[j][i] = rand() % 256; + } + } + + uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL); + WelsInitMcFuncs (&sMcFunc, uiCpuFlag); + + MCHalfPelFilterAnchor (uAnchors[1], uAnchors[2], uAnchors[3], uAnchors[0], MC_BUFF_SRC_STRIDE, width, height, pBuf + 4); + sMcFunc.pfLumaHalfpelHor (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uAnchor[1][4 + j][4 + i], uDstTest[j][i]); + } + } + sMcFunc.pfLumaHalfpelVer (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width, height + 1); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uAnchor[2][4 + j][4 + i], uDstTest[j][i]); + } + } + sMcFunc.pfLumaHalfpelCen (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height + 1); + for (int32_t j = 0; j < height; j++) { + for (int32_t i = 0; i < width; i++) { + ASSERT_EQ (uAnchor[3][4 + j][4 + i], uDstTest[j][i]); + } + } + } + } +} From 10e2f90b7ea1b08b164e4185d864d536f7a245ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 15:13:25 +0200 Subject: [PATCH 11/13] Bundle MC tests for all motion vectors in one single test This speeds up the compile time from 21.3 to 2.6 seconds for the MC test files. This makes it slightly harder to see exactly which test failed on a quick glance, but it makes the overall structure of the unit test output more manageable and readable, by reducing the number of tests from 1300 to 430. --- test/decoder/DecUT_MotionCompensation.cpp | 59 ++++++------ test/encoder/EncUT_MotionCompensation.cpp | 29 +++--- test/mc_test_common.h | 105 +++------------------- 3 files changed, 51 insertions(+), 142 deletions(-) diff --git a/test/decoder/DecUT_MotionCompensation.cpp b/test/decoder/DecUT_MotionCompensation.cpp index af51b901..b517c77c 100644 --- a/test/decoder/DecUT_MotionCompensation.cpp +++ b/test/decoder/DecUT_MotionCompensation.cpp @@ -9,36 +9,31 @@ using namespace WelsDec; #define CHROMA_FUNC sMcFunc.pMcChromaFunc -#define PREFIX - -#define DEF_MCCOPYTESTS(pfx) \ -DEF_MCCOPYTEST (pfx, 2, 2, 1) \ -DEF_MCCOPYTEST (pfx, 2, 4, 1) \ -DEF_MCCOPYTEST (pfx, 4, 2, 0) \ -DEF_MCCOPYTEST (pfx, 4, 4, 0) \ -DEF_MCCOPYTEST (pfx, 4, 8, 0) \ -DEF_MCCOPYTEST (pfx, 8, 4, 0) \ -DEF_MCCOPYTEST (pfx, 8, 8, 0) \ -DEF_MCCOPYTEST (pfx, 16, 8, 0) \ -DEF_MCCOPYTEST (pfx, 8, 16, 0) \ -DEF_MCCOPYTEST (pfx, 16, 16, 0) - -#define DEF_LUMA_MCTEST(pfx,a,b) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,4,4) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,4,8) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,8,4) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,8,8) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,8) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,8,16) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,16) - -#define DEF_CHROMA_MCTEST(pfx,a,b) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,2,2) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,2,4) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,2) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,4) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,8) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,4) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,8) - #include "mc_test_common.h" + +DEF_MCCOPYTEST (, 2, 2, 1) +DEF_MCCOPYTEST (, 2, 4, 1) +DEF_MCCOPYTEST (, 4, 2, 0) +DEF_MCCOPYTEST (, 4, 4, 0) +DEF_MCCOPYTEST (, 4, 8, 0) +DEF_MCCOPYTEST (, 8, 4, 0) +DEF_MCCOPYTEST (, 8, 8, 0) +DEF_MCCOPYTEST (, 16, 8, 0) +DEF_MCCOPYTEST (, 8, 16, 0) +DEF_MCCOPYTEST (, 16, 16, 0) + +DEF_LUMA_MCTEST (, 4, 4) +DEF_LUMA_MCTEST (, 4, 8) +DEF_LUMA_MCTEST (, 8, 4) +DEF_LUMA_MCTEST (, 8, 8) +DEF_LUMA_MCTEST (, 16, 8) +DEF_LUMA_MCTEST (, 8, 16) +DEF_LUMA_MCTEST (, 16, 16) + +DEF_CHROMA_MCTEST (, 2, 2) +DEF_CHROMA_MCTEST (, 2, 4) +DEF_CHROMA_MCTEST (, 4, 2) +DEF_CHROMA_MCTEST (, 4, 4) +DEF_CHROMA_MCTEST (, 4, 8) +DEF_CHROMA_MCTEST (, 8, 4) +DEF_CHROMA_MCTEST (, 8, 8) diff --git a/test/encoder/EncUT_MotionCompensation.cpp b/test/encoder/EncUT_MotionCompensation.cpp index c5ca22a0..a2e94ce1 100644 --- a/test/encoder/EncUT_MotionCompensation.cpp +++ b/test/encoder/EncUT_MotionCompensation.cpp @@ -18,25 +18,20 @@ static void McLumaFunc (SMcFunc* pFuncs, const uint8_t* pSrc, int32_t iSrcStride #define CHROMA_FUNC sMcFunc.pfChromaMc -#define PREFIX Enc - -#define DEF_MCCOPYTESTS(pfx) \ -DEF_MCCOPYTEST (pfx, 16, 8, 0) \ -DEF_MCCOPYTEST (pfx, 16, 16, 0) - -#define DEF_LUMA_MCTEST(pfx,a,b) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,8) \ -DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,16,16) - -#define DEF_CHROMA_MCTEST(pfx,a,b) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,2) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,4) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,4,8) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,4) \ -DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,8,8) - #include "mc_test_common.h" +DEF_MCCOPYTEST (Enc, 16, 8, 0) +DEF_MCCOPYTEST (Enc, 16, 16, 0) + +DEF_LUMA_MCTEST (Enc, 16, 8) +DEF_LUMA_MCTEST (Enc, 16, 16) + +DEF_CHROMA_MCTEST (Enc, 4, 2) +DEF_CHROMA_MCTEST (Enc, 4, 4) +DEF_CHROMA_MCTEST (Enc, 4, 8) +DEF_CHROMA_MCTEST (Enc, 8, 4) +DEF_CHROMA_MCTEST (Enc, 8, 8) + TEST (EncMcAvg, PixelAvg) { SMcFunc sMcFunc; for (int32_t k = 0; k < 2; k++) { diff --git a/test/mc_test_common.h b/test/mc_test_common.h index bf1ee6e6..12c85ff8 100644 --- a/test/mc_test_common.h +++ b/test/mc_test_common.h @@ -149,11 +149,11 @@ TEST(pfx##McCopy_c,iW##x##iH) \ }\ } -DEF_MCCOPYTESTS (PREFIX) - -#define DEF_LUMA_MCTEST_SUBCASE(pfx,a,b,iW,iH) \ -TEST(pfx##McHorVer##a##b##_c,iW##x##iH) \ +#define DEF_LUMA_MCTEST(pfx,iW,iH) \ +TEST(pfx##McHorVer,iW##x##iH) \ { \ + for (int32_t a = 0; a < 4; a++) { \ + for (int32_t b = 0; b < 4; b++) { \ SMcFunc sMcFunc; \ uint8_t uSrcAnchor[4][MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ @@ -197,27 +197,15 @@ TEST(pfx##McHorVer##a##b##_c,iW##x##iH) \ } \ } \ }\ + }\ + }\ } -DEF_LUMA_MCTEST (PREFIX, 0, 1) -DEF_LUMA_MCTEST (PREFIX, 0, 2) -DEF_LUMA_MCTEST (PREFIX, 0, 3) -DEF_LUMA_MCTEST (PREFIX, 1, 0) -DEF_LUMA_MCTEST (PREFIX, 1, 1) -DEF_LUMA_MCTEST (PREFIX, 1, 2) -DEF_LUMA_MCTEST (PREFIX, 1, 3) -DEF_LUMA_MCTEST (PREFIX, 2, 0) -DEF_LUMA_MCTEST (PREFIX, 2, 1) -DEF_LUMA_MCTEST (PREFIX, 2, 2) -DEF_LUMA_MCTEST (PREFIX, 2, 3) -DEF_LUMA_MCTEST (PREFIX, 3, 0) -DEF_LUMA_MCTEST (PREFIX, 3, 1) -DEF_LUMA_MCTEST (PREFIX, 3, 2) -DEF_LUMA_MCTEST (PREFIX, 3, 3) - -#define DEF_CHROMA_MCTEST_SUBCASE(pfx,a,b,iW,iH) \ -TEST(pfx##McChromaWithFragMv_##a##b##_c,iW##x##iH) \ +#define DEF_CHROMA_MCTEST(pfx,iW,iH) \ +TEST(pfx##McChroma,iW##x##iH) \ { \ + for (int32_t a = 0; a < 8; a++) { \ + for (int32_t b = 0; b < 8; b++) { \ SMcFunc sMcFunc; \ uint8_t uSrcAnchor[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE*2]; \ uint8_t uSrcTest[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE]; \ @@ -256,75 +244,6 @@ TEST(pfx##McChromaWithFragMv_##a##b##_c,iW##x##iH) \ } \ } \ }\ + }\ + }\ } - -DEF_CHROMA_MCTEST (PREFIX, 0, 1) -DEF_CHROMA_MCTEST (PREFIX, 0, 2) -DEF_CHROMA_MCTEST (PREFIX, 0, 3) -DEF_CHROMA_MCTEST (PREFIX, 0, 4) -DEF_CHROMA_MCTEST (PREFIX, 0, 5) -DEF_CHROMA_MCTEST (PREFIX, 0, 6) -DEF_CHROMA_MCTEST (PREFIX, 0, 7) - -DEF_CHROMA_MCTEST (PREFIX, 1, 0) -DEF_CHROMA_MCTEST (PREFIX, 1, 1) -DEF_CHROMA_MCTEST (PREFIX, 1, 2) -DEF_CHROMA_MCTEST (PREFIX, 1, 3) -DEF_CHROMA_MCTEST (PREFIX, 1, 4) -DEF_CHROMA_MCTEST (PREFIX, 1, 5) -DEF_CHROMA_MCTEST (PREFIX, 1, 6) -DEF_CHROMA_MCTEST (PREFIX, 1, 7) - -DEF_CHROMA_MCTEST (PREFIX, 2, 0) -DEF_CHROMA_MCTEST (PREFIX, 2, 1) -DEF_CHROMA_MCTEST (PREFIX, 2, 2) -DEF_CHROMA_MCTEST (PREFIX, 2, 3) -DEF_CHROMA_MCTEST (PREFIX, 2, 4) -DEF_CHROMA_MCTEST (PREFIX, 2, 5) -DEF_CHROMA_MCTEST (PREFIX, 2, 6) -DEF_CHROMA_MCTEST (PREFIX, 2, 7) - -DEF_CHROMA_MCTEST (PREFIX, 3, 0) -DEF_CHROMA_MCTEST (PREFIX, 3, 1) -DEF_CHROMA_MCTEST (PREFIX, 3, 2) -DEF_CHROMA_MCTEST (PREFIX, 3, 3) -DEF_CHROMA_MCTEST (PREFIX, 3, 4) -DEF_CHROMA_MCTEST (PREFIX, 3, 5) -DEF_CHROMA_MCTEST (PREFIX, 3, 6) -DEF_CHROMA_MCTEST (PREFIX, 3, 7) - -DEF_CHROMA_MCTEST (PREFIX, 4, 0) -DEF_CHROMA_MCTEST (PREFIX, 4, 1) -DEF_CHROMA_MCTEST (PREFIX, 4, 2) -DEF_CHROMA_MCTEST (PREFIX, 4, 3) -DEF_CHROMA_MCTEST (PREFIX, 4, 4) -DEF_CHROMA_MCTEST (PREFIX, 4, 5) -DEF_CHROMA_MCTEST (PREFIX, 4, 6) -DEF_CHROMA_MCTEST (PREFIX, 4, 7) - -DEF_CHROMA_MCTEST (PREFIX, 5, 0) -DEF_CHROMA_MCTEST (PREFIX, 5, 1) -DEF_CHROMA_MCTEST (PREFIX, 5, 2) -DEF_CHROMA_MCTEST (PREFIX, 5, 3) -DEF_CHROMA_MCTEST (PREFIX, 5, 4) -DEF_CHROMA_MCTEST (PREFIX, 5, 5) -DEF_CHROMA_MCTEST (PREFIX, 5, 6) -DEF_CHROMA_MCTEST (PREFIX, 5, 7) - -DEF_CHROMA_MCTEST (PREFIX, 6, 0) -DEF_CHROMA_MCTEST (PREFIX, 6, 1) -DEF_CHROMA_MCTEST (PREFIX, 6, 2) -DEF_CHROMA_MCTEST (PREFIX, 6, 3) -DEF_CHROMA_MCTEST (PREFIX, 6, 4) -DEF_CHROMA_MCTEST (PREFIX, 6, 5) -DEF_CHROMA_MCTEST (PREFIX, 6, 6) -DEF_CHROMA_MCTEST (PREFIX, 6, 7) - -DEF_CHROMA_MCTEST (PREFIX, 7, 0) -DEF_CHROMA_MCTEST (PREFIX, 7, 1) -DEF_CHROMA_MCTEST (PREFIX, 7, 2) -DEF_CHROMA_MCTEST (PREFIX, 7, 3) -DEF_CHROMA_MCTEST (PREFIX, 7, 4) -DEF_CHROMA_MCTEST (PREFIX, 7, 5) -DEF_CHROMA_MCTEST (PREFIX, 7, 6) -DEF_CHROMA_MCTEST (PREFIX, 7, 7) From 279e14b34e7cb752eb82c0720512e08555a53b88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 22:54:58 +0200 Subject: [PATCH 12/13] Add const to some inline functions within MC --- codec/decoder/core/src/mc.cpp | 2 +- codec/encoder/core/src/mc.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codec/decoder/core/src/mc.cpp b/codec/decoder/core/src/mc.cpp index bcc166be..3c634458 100644 --- a/codec/decoder/core/src/mc.cpp +++ b/codec/decoder/core/src/mc.cpp @@ -133,7 +133,7 @@ static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, u //--------------------Luma sample MC------------------// -static inline int32_t HorFilterInput16bit_c (int16_t* pSrc) { +static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) { int32_t iPix05 = pSrc[-2] + pSrc[3]; int32_t iPix14 = pSrc[-1] + pSrc[2]; int32_t iPix23 = pSrc[ 0] + pSrc[1]; diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp index 55b5f945..60b32564 100644 --- a/codec/encoder/core/src/mc.cpp +++ b/codec/encoder/core/src/mc.cpp @@ -125,7 +125,7 @@ static inline int32_t HorFilter_c (const uint8_t* pSrc) { return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2)); } -static inline int32_t HorFilterInput16bit1_c (int16_t* pSrc) { +static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) { int32_t iPix05 = pSrc[-2] + pSrc[3]; int32_t iPix14 = pSrc[-1] + pSrc[2]; int32_t iPix23 = pSrc[ 0] + pSrc[1]; From 23b20fb14cd7eb99fcefa9709517457fb3b5b3e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 26 Jan 2015 22:57:20 +0200 Subject: [PATCH 13/13] Simplify code in HorFilterInput16bit in MC This avoids a gcc optimizer bug (which seems to be present in some gcc 4.6 and 4.7 versions) at the -O3 level. --- codec/decoder/core/src/mc.cpp | 8 ++++---- codec/encoder/core/src/mc.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/codec/decoder/core/src/mc.cpp b/codec/decoder/core/src/mc.cpp index 3c634458..326002f4 100644 --- a/codec/decoder/core/src/mc.cpp +++ b/codec/decoder/core/src/mc.cpp @@ -134,9 +134,9 @@ static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, u //--------------------Luma sample MC------------------// static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) { - int32_t iPix05 = pSrc[-2] + pSrc[3]; - int32_t iPix14 = pSrc[-1] + pSrc[2]; - int32_t iPix23 = pSrc[ 0] + pSrc[1]; + int32_t iPix05 = pSrc[0] + pSrc[5]; + int32_t iPix14 = pSrc[1] + pSrc[4]; + int32_t iPix23 = pSrc[2] + pSrc[3]; return (iPix05 - (iPix14 * 5) + (iPix23 * 20)); } @@ -213,7 +213,7 @@ static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_ iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride); } for (k = 0; k < iWidth; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[2 + k]) + 512) >> 10); + pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10); } pSrc += iSrcStride; pDst += iDstStride; diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp index 60b32564..181a30ab 100644 --- a/codec/encoder/core/src/mc.cpp +++ b/codec/encoder/core/src/mc.cpp @@ -126,9 +126,9 @@ static inline int32_t HorFilter_c (const uint8_t* pSrc) { } static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) { - int32_t iPix05 = pSrc[-2] + pSrc[3]; - int32_t iPix14 = pSrc[-1] + pSrc[2]; - int32_t iPix23 = pSrc[ 0] + pSrc[1]; + int32_t iPix05 = pSrc[0] + pSrc[5]; + int32_t iPix14 = pSrc[1] + pSrc[4]; + int32_t iPix23 = pSrc[2] + pSrc[3]; return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2)); } @@ -203,7 +203,7 @@ static inline void McHorVer22WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStrid pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride); } for (k = 0; k < 16; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10); + pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10); } pSrc += iSrcStride; pDst += iDstStride; @@ -342,7 +342,7 @@ static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_ pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride); } for (k = 0; k < iWidth; k++) { - pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10); + pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10); } pSrc += iSrcStride; pDst += iDstStride;