From c3cfce52234cff956e541896886c25d8e810ccba Mon Sep 17 00:00:00 2001 From: huili2 Date: Mon, 1 Jun 2015 15:11:20 +0800 Subject: [PATCH] modify some functions extending to sub8x8 usage, especially in ME part --- codec/common/inc/copy_mb.h | 4 +- codec/common/inc/sad_common.h | 6 +- codec/common/src/copy_mb.cpp | 10 ++- codec/common/src/sad_common.cpp | 26 +++++++ codec/encoder/core/inc/mb_cache.h | 2 +- codec/encoder/core/inc/md.h | 3 + codec/encoder/core/inc/wels_const.h | 6 +- codec/encoder/core/inc/wels_func_ptr_def.h | 6 +- codec/encoder/core/src/encode_mb_aux.cpp | 4 +- codec/encoder/core/src/encoder_ext.cpp | 4 +- codec/encoder/core/src/sample.cpp | 21 ++++++ codec/encoder/core/src/svc_encode_mb.cpp | 2 +- test/encoder/EncUT_EncoderMbAux.cpp | 4 +- test/encoder/EncUT_Sample.cpp | 81 ++++++++++++++++++++++ 14 files changed, 165 insertions(+), 14 deletions(-) diff --git a/codec/common/inc/copy_mb.h b/codec/common/inc/copy_mb.h index a28000d7..24ee1924 100644 --- a/codec/common/inc/copy_mb.h +++ b/codec/common/inc/copy_mb.h @@ -38,7 +38,9 @@ /**************************************************************************** * Copy functions ****************************************************************************/ -void WelsCopy4x4 (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy4x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy4x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); void WelsCopy8x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); void WelsCopy8x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); // void WelsCopy16x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); // diff --git a/codec/common/inc/sad_common.h b/codec/common/inc/sad_common.h index ef3e56f7..cdd5c4f2 100644 --- a/codec/common/inc/sad_common.h +++ b/codec/common/inc/sad_common.h @@ -41,8 +41,8 @@ int32_t WelsSampleSad16x16_c (uint8_t*, int32_t, uint8_t*, int32_t); int32_t WelsSampleSad16x8_c (uint8_t*, int32_t, uint8_t*, int32_t); int32_t WelsSampleSad8x16_c (uint8_t*, int32_t, uint8_t*, int32_t); int32_t WelsSampleSad8x8_c (uint8_t*, int32_t, uint8_t*, int32_t); -//int32_t WelsSampleSad8x4( uint8_t *, int32_t, uint8_t *, int32_t ); -//int32_t WelsSampleSad4x8( uint8_t *, int32_t, uint8_t *, int32_t ); +int32_t WelsSampleSad8x4_c( uint8_t *, int32_t, uint8_t *, int32_t ); +int32_t WelsSampleSad4x8_c( uint8_t *, int32_t, uint8_t *, int32_t ); int32_t WelsSampleSad4x4_c (uint8_t*, int32_t, uint8_t*, int32_t); @@ -52,6 +52,8 @@ void WelsSampleSadFour16x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSam void WelsSampleSadFour8x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); void WelsSampleSadFour8x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); void WelsSampleSadFour4x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour8x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); +void WelsSampleSadFour4x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad); #if defined(__cplusplus) extern "C" { diff --git a/codec/common/src/copy_mb.cpp b/codec/common/src/copy_mb.cpp index d13b4a93..219edb50 100644 --- a/codec/common/src/copy_mb.cpp +++ b/codec/common/src/copy_mb.cpp @@ -45,7 +45,7 @@ /**************************************************************************** * Copy functions ****************************************************************************/ -void WelsCopy4x4 (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { +void WelsCopy4x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { const int32_t kiSrcStride2 = iStrideS << 1; const int32_t kiSrcStride3 = iStrideS + kiSrcStride2; const int32_t kiDstStride2 = iStrideD << 1; @@ -56,6 +56,14 @@ void WelsCopy4x4 (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrid ST32 (pDst + kiDstStride2, LD32 (pSrc + kiSrcStride2)); ST32 (pDst + kiDstStride3, LD32 (pSrc + kiSrcStride3)); } +void WelsCopy8x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + WelsCopy4x4_c (pDst, iStrideD, pSrc, iStrideS); + WelsCopy4x4_c (pDst + 4, iStrideD, pSrc + 4, iStrideS); +} +void WelsCopy4x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { + WelsCopy4x4_c (pDst, iStrideD, pSrc, iStrideS); + WelsCopy4x4_c (pDst + (iStrideD << 2), iStrideD, pSrc + (iStrideS << 2), iStrideS); +} void WelsCopy8x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { int32_t i; for (i = 0; i < 4; i++) { diff --git a/codec/common/src/sad_common.cpp b/codec/common/src/sad_common.cpp index b634d08b..95383421 100644 --- a/codec/common/src/sad_common.cpp +++ b/codec/common/src/sad_common.cpp @@ -59,6 +59,20 @@ int32_t WelsSampleSad4x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSampl return iSadSum; } +int32_t WelsSampleSad8x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + iSadSum += WelsSampleSad4x4_c (pSample1, iStride1, pSample2, iStride2); + iSadSum += WelsSampleSad4x4_c (pSample1 + 4, iStride1, pSample2 + 4, iStride2); + return iSadSum; +} + +int32_t WelsSampleSad4x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSadSum = 0; + iSadSum += WelsSampleSad4x4_c (pSample1, iStride1, pSample2, iStride2); + iSadSum += WelsSampleSad4x4_c (pSample1 + (iStride1 << 2), iStride1, pSample2 + (iStride2 << 2), iStride2); + return iSadSum; +} + int32_t WelsSampleSad8x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { int32_t iSadSum = 0; int32_t i = 0; @@ -137,3 +151,15 @@ void WelsSampleSadFour4x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSamp * (pSad + 2) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 - 1), iStride2); * (pSad + 3) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 + 1), iStride2); } +void WelsSampleSadFour8x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} +void WelsSampleSadFour4x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) { + * (pSad) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 - iStride2), iStride2); + * (pSad + 1) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 + iStride2), iStride2); + * (pSad + 2) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 - 1), iStride2); + * (pSad + 3) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 + 1), iStride2); +} diff --git a/codec/encoder/core/inc/mb_cache.h b/codec/encoder/core/inc/mb_cache.h index 21a380d8..685e73ef 100644 --- a/codec/encoder/core/inc/mb_cache.h +++ b/codec/encoder/core/inc/mb_cache.h @@ -79,7 +79,7 @@ ALIGNED_DECLARE (int8_t, iIntraPredMode[48], 16); // must follow with iNonZeroCoeffCount! int32_t iSadCost[4]; //avail 1; unavail 0 -SMVUnitXY sMbMvp[MB_BLOCK8x8_NUM];// for write bs +SMVUnitXY sMbMvp[MB_BLOCK4x4_NUM];// for write bs //for residual decoding (recovery) at the side of Encoder int16_t* pCoeffLevel; // tmep diff --git a/codec/encoder/core/inc/md.h b/codec/encoder/core/inc/md.h index 7230a6b6..eca89d12 100644 --- a/codec/encoder/core/inc/md.h +++ b/codec/encoder/core/inc/md.h @@ -109,6 +109,9 @@ struct { SWelsME sMe8x8[4]; SWelsME sMe16x8[2]; SWelsME sMe8x16[2]; + SWelsME sMe4x4[4][4]; + SWelsME sMe8x4[4][2]; + SWelsME sMe4x8[4][2]; // SMVUnitXY i_mvbs[MB_BLOCK8x8_NUM]; //scaled MVB } sMe; diff --git a/codec/encoder/core/inc/wels_const.h b/codec/encoder/core/inc/wels_const.h index 3e2539c4..57d7b212 100644 --- a/codec/encoder/core/inc/wels_const.h +++ b/codec/encoder/core/inc/wels_const.h @@ -142,9 +142,9 @@ BLOCK_16x8 = 1, BLOCK_8x16 = 2, BLOCK_8x8 = 3, BLOCK_4x4 = 4, -// BLOCK_8x4 = 5, -// BLOCK_4x8 = 6, -BLOCK_SIZE_ALL = 5 +BLOCK_8x4 = 5, +BLOCK_4x8 = 6, +BLOCK_SIZE_ALL = 7 }; typedef enum { diff --git a/codec/encoder/core/inc/wels_func_ptr_def.h b/codec/encoder/core/inc/wels_func_ptr_def.h index a5917927..2238fd56 100644 --- a/codec/encoder/core/inc/wels_func_ptr_def.h +++ b/codec/encoder/core/inc/wels_func_ptr_def.h @@ -155,7 +155,7 @@ typedef void (*PCalculateBlockFeatureOfFrame) (uint8_t* pRef, const int32_t kiWi typedef int32_t (*PCalculateSingleBlockFeature) (uint8_t* pRef, const int32_t kiRefStride); typedef void (*PUpdateFMESwitch) (SDqLayer* pCurLayer); -#define MAX_BLOCK_TYPE 5 // prev 7 +#define MAX_BLOCK_TYPE BLOCK_SIZE_ALL typedef struct TagSampleDealingFunc { PSampleSadSatdCostFunc pfSampleSad[MAX_BLOCK_TYPE]; PSampleSadSatdCostFunc pfSampleSatd[MAX_BLOCK_TYPE]; @@ -235,8 +235,10 @@ struct TagWelsFuncPointerList { PCopyFunc pfCopy8x8Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c md.c PCopyFunc pfCopy16x8NotAligned; //for MeRefineFracPixel 16x8 based PCopyFunc pfCopy8x16Aligned; //for MeRefineFracPixel 8x16 based + PCopyFunc pfCopy4x4; //not sure if aligned or not, need further tune + PCopyFunc pfCopy8x4; //not sure if aligned or not, need further tune + PCopyFunc pfCopy4x8; //not sure if aligned or not, need further tune - //svc_encode_mb.c encode_mb_aux.c PDctFunc pfDctT4; PDctFunc pfDctFourT4; diff --git a/codec/encoder/core/src/encode_mb_aux.cpp b/codec/encoder/core/src/encode_mb_aux.cpp index d7b9f78a..d448fe8a 100644 --- a/codec/encoder/core/src/encode_mb_aux.cpp +++ b/codec/encoder/core/src/encode_mb_aux.cpp @@ -467,7 +467,9 @@ void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c; pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_c; pFuncList->pfCopy8x16Aligned = WelsCopy8x16_c; - + pFuncList->pfCopy4x4 = WelsCopy4x4_c; + pFuncList->pfCopy8x4 = WelsCopy8x4_c; + pFuncList->pfCopy4x8 = WelsCopy4x8_c; pFuncList->pfQuantizationHadamard2x2 = WelsHadamardQuant2x2_c; pFuncList->pfQuantizationHadamard2x2Skip = WelsHadamardQuant2x2Skip_c; pFuncList->pfTransformHadamard4x4Dc = WelsHadamardT4Dc_c; diff --git a/codec/encoder/core/src/encoder_ext.cpp b/codec/encoder/core/src/encoder_ext.cpp index bf31c31c..33d116b9 100644 --- a/codec/encoder/core/src/encoder_ext.cpp +++ b/codec/encoder/core/src/encoder_ext.cpp @@ -2983,7 +2983,9 @@ void PreprocessSliceCoding (sWelsEncCtx* pCtx) { pFuncList->pfSearchMethod[BLOCK_16x8] = pFuncList->pfSearchMethod[BLOCK_8x16] = pFuncList->pfSearchMethod[BLOCK_8x8] = - pFuncList->pfSearchMethod[BLOCK_4x4] = WelsDiamondSearch; + pFuncList->pfSearchMethod[BLOCK_4x4] = + pFuncList->pfSearchMethod[BLOCK_8x4] = + pFuncList->pfSearchMethod[BLOCK_4x8] = WelsDiamondSearch; pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode; pFuncList->sSampleDealingFuncs.pfMeCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd; pFuncList->pfSetScrollingMv = SetScrollingMvToMdNull; diff --git a/codec/encoder/core/src/sample.cpp b/codec/encoder/core/src/sample.cpp index 7b5dc07b..80c6c6e2 100644 --- a/codec/encoder/core/src/sample.cpp +++ b/codec/encoder/core/src/sample.cpp @@ -95,6 +95,21 @@ int32_t WelsSampleSatd4x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSamp return ((iSatdSum + 1) >> 1); } + +int32_t WelsSampleSatd8x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + iSatdSum += WelsSampleSatd4x4_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd4x4_c (pSample1 + 4, iStride1, pSample2 + 4, iStride2); + return iSatdSum; +} + +int32_t WelsSampleSatd4x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { + int32_t iSatdSum = 0; + iSatdSum += WelsSampleSatd4x4_c (pSample1, iStride1, pSample2, iStride2); + iSatdSum += WelsSampleSatd4x4_c (pSample1 + (iStride1 << 2), iStride1, pSample2 + (iStride2 << 2), iStride2); + return iSatdSum; +} + int32_t WelsSampleSatd8x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) { int32_t iSatdSum = 0; @@ -325,6 +340,8 @@ void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16 ] = WelsSampleSad8x16_c; pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8 ] = WelsSampleSad8x8_c; pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4 ] = WelsSampleSad4x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x4 ] = WelsSampleSad8x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x8 ] = WelsSampleSad4x8_c; //pfSampleSatd init pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_c; @@ -332,12 +349,16 @@ void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_c; pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8 ] = WelsSampleSatd8x8_c; pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4 ] = WelsSampleSatd4x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x4 ] = WelsSampleSatd8x4_c; + pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x8 ] = WelsSampleSatd4x8_c; pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_c; pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_c; pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_c; pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_c; pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x4] = WelsSampleSadFour8x4_c; + pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x8] = WelsSampleSadFour4x8_c; pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = NULL; pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = NULL; diff --git a/codec/encoder/core/src/svc_encode_mb.cpp b/codec/encoder/core/src/svc_encode_mb.cpp index a3493656..86689ce3 100644 --- a/codec/encoder/core/src/svc_encode_mb.cpp +++ b/codec/encoder/core/src/svc_encode_mb.cpp @@ -174,7 +174,7 @@ void WelsEncRecI4x4Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, uin pFuncList->pfDequantization4x4 (pResI4x4, g_kuiDequantCoeff[uiQp]); pFuncList->pfIDctT4 (pPredI4x4, iRecStride, pBestPred, 4, pResI4x4); } else - WelsCopy4x4 (pPredI4x4, iRecStride, pBestPred, 4); + pFuncList->pfCopy4x4 (pPredI4x4, iRecStride, pBestPred, 4); } void WelsEncInterY (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) { diff --git a/test/encoder/EncUT_EncoderMbAux.cpp b/test/encoder/EncUT_EncoderMbAux.cpp index c548f8b2..4388af70 100644 --- a/test/encoder/EncUT_EncoderMbAux.cpp +++ b/test/encoder/EncUT_EncoderMbAux.cpp @@ -241,7 +241,9 @@ TEST(EncodeMbAuxTest, function) { \ EXPECT_EQ(ref_dst[i*iDStride+j], dst[i*iDStride+j]); \ } -GENERATE_UT_FOR_COPY (4, 4, WelsCopy4x4); +GENERATE_UT_FOR_COPY (4, 4, WelsCopy4x4_c); +GENERATE_UT_FOR_COPY (8, 4, WelsCopy8x4_c); +GENERATE_UT_FOR_COPY (4, 8, WelsCopy4x8_c); GENERATE_UT_FOR_COPY (8, 8, WelsCopy8x8_c); GENERATE_UT_FOR_COPY (8, 16, WelsCopy8x16_c); GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8_c); diff --git a/test/encoder/EncUT_Sample.cpp b/test/encoder/EncUT_Sample.cpp index ca954a38..1490b731 100644 --- a/test/encoder/EncUT_Sample.cpp +++ b/test/encoder/EncUT_Sample.cpp @@ -188,6 +188,42 @@ TEST_F (SadSatdCFuncTest, WelsSampleSad4x4_c) { EXPECT_EQ (WelsSampleSad4x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); } +TEST_F (SadSatdCFuncTest, WelsSampleSad8x4_c) { + for (int i = 0; i < (m_iStrideA << 2); i++) + m_pPixSrcA[i] = rand() % 256; + for (int i = 0; i < (m_iStrideB << 2); i++) + m_pPixSrcB[i] = rand() % 256; + uint8_t* pPixA = m_pPixSrcA; + uint8_t* pPixB = m_pPixSrcB; + + int32_t iSumSad = 0; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 8; j++) + iSumSad += abs (pPixA[j] - pPixB[j]); + pPixA += m_iStrideA; + pPixB += m_iStrideB; + } + EXPECT_EQ (WelsSampleSad8x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); +} + +TEST_F (SadSatdCFuncTest, WelsSampleSad4x8_c) { + for (int i = 0; i < (m_iStrideA << 2); i++) + m_pPixSrcA[i] = rand() % 256; + for (int i = 0; i < (m_iStrideB << 2); i++) + m_pPixSrcB[i] = rand() % 256; + uint8_t* pPixA = m_pPixSrcA; + uint8_t* pPixB = m_pPixSrcB; + + int32_t iSumSad = 0; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 4; j++) + iSumSad += abs (pPixA[j] - pPixB[j]); + pPixA += m_iStrideA; + pPixB += m_iStrideB; + } + EXPECT_EQ (WelsSampleSad4x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); +} + TEST_F (SadSatdCFuncTest, WelsSampleSad8x8_c) { for (int i = 0; i < (m_iStrideA << 3); i++) m_pPixSrcA[i] = rand() % 256; @@ -444,6 +480,51 @@ TEST_F (SadSatdCFuncTest, WelsSampleSadFour4x4_c) { EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad); } +TEST_F (SadSatdCFuncTest, WelsSampleSadFour8x4_c) { + for (int i = 0; i < (m_iStrideA << 3); i++) + m_pPixSrcA[i] = rand() % 256; + for (int i = 0; i < (m_iStrideB << 3); i++) + m_pPixSrcB[i] = rand() % 256; + uint8_t* pPixA = m_pPixSrcA; + uint8_t* pPixB = m_pPixSrcB + m_iStrideB; + + int32_t iSumSad = 0; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 8; j++) { + iSumSad += abs (pPixA[j] - pPixB[j - 1]); + iSumSad += abs (pPixA[j] - pPixB[j + 1]); + iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]); + iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]); + } + pPixA += m_iStrideA; + pPixB += m_iStrideB; + } + WelsSampleSadFour8x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad); + EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad); +} + +TEST_F (SadSatdCFuncTest, WelsSampleSadFour4x8_c) { + for (int i = 0; i < (m_iStrideA << 3); i++) + m_pPixSrcA[i] = rand() % 256; + for (int i = 0; i < (m_iStrideB << 3); i++) + m_pPixSrcB[i] = rand() % 256; + uint8_t* pPixA = m_pPixSrcA; + uint8_t* pPixB = m_pPixSrcB + m_iStrideB; + + int32_t iSumSad = 0; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 4; j++) { + iSumSad += abs (pPixA[j] - pPixB[j - 1]); + iSumSad += abs (pPixA[j] - pPixB[j + 1]); + iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]); + iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]); + } + pPixA += m_iStrideA; + pPixB += m_iStrideB; + } + WelsSampleSadFour4x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad); + EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad); +} class SadSatdAssemblyFuncTest : public testing::Test { public: