diff --git a/codec/encoder/core/inc/picture.h b/codec/encoder/core/inc/picture.h index 86847a4b..d57ef8ed 100644 --- a/codec/encoder/core/inc/picture.h +++ b/codec/encoder/core/inc/picture.h @@ -51,11 +51,13 @@ typedef struct TagScreenBlockFeatureStorage typedef struct TagScreenContentStorage{ SScreenBlockFeatureStorage sRefBlockFeature[MAX_MULTI_REF_PIC_COUNT]; - bool bRefBlockFeatureCalculated; // flag of whether pre-process is done + uint32_t uiSadCostThreshold[BLOCK_SIZE_ALL]; + + bool bRefBlockFeatureCalculated; // flag of whether pre-process is done uint8_t uiFeatureStrategyIndex;// index of hash strategy /* for FME frame-level switch */ - bool bFMESwitchFlag; + bool bFMESwitchFlag; uint8_t uiFMEGoodFrameCount; int32_t iHighFreMbCount; }SScreenContentStorage; @@ -116,3 +118,4 @@ typedef struct TagPicture { } // end of namespace WelsSVCEnc { #endif//WELS_PICTURE_H__ + diff --git a/codec/encoder/core/inc/sample.h b/codec/encoder/core/inc/sample.h index 23bba3c2..b52e73b9 100644 --- a/codec/encoder/core/inc/sample.h +++ b/codec/encoder/core/inc/sample.h @@ -37,15 +37,6 @@ #include "wels_func_ptr_def.h" namespace WelsSVCEnc { -enum { - BLOCK_16x16 = 0, - BLOCK_16x8 = 1, - BLOCK_8x16 = 2, - BLOCK_8x8 = 3, - BLOCK_4x4 = 4, -// BLOCK_8x4 = 5, -// BLOCK_4x8 = 6, -}; //======================SATD======================// int32_t WelsSampleSatd16x16_c (uint8_t*, int32_t, uint8_t*, int32_t); diff --git a/codec/encoder/core/inc/slice.h b/codec/encoder/core/inc/slice.h index 092382e2..4772b44d 100644 --- a/codec/encoder/core/inc/slice.h +++ b/codec/encoder/core/inc/slice.h @@ -162,7 +162,6 @@ typedef struct TagSlice { /*******************************sSliceHeader****************************/ SSliceHeaderExt sSliceHeaderExt; - SMVUnitXY sMvStartMin; SMVUnitXY sMvStartMax; SMVUnitXY sMvc[5]; @@ -175,8 +174,12 @@ typedef struct TagSlice { bool bDynamicSlicingSliceSizeCtrlFlag; uint8_t uiAssumeLog2BytePerMb; + + uint32_t uiSliceFMECostDown;//TODO: for FME switch under MT, to opt after ME final? + uint8_t uiReservedFillByte; // reserved to meet 4 bytes alignment } SSlice, *PSlice; } #endif//WELS_SLICE_H__ + diff --git a/codec/encoder/core/inc/wels_const.h b/codec/encoder/core/inc/wels_const.h index 5f14983e..1725cb48 100644 --- a/codec/encoder/core/inc/wels_const.h +++ b/codec/encoder/core/inc/wels_const.h @@ -173,6 +173,18 @@ #define MAX_NAL_UNIT_NUM_IN_AU 256 // predefined maximal number of NAL Units in an access unit #define MAX_ACCESS_UINT_CAPACITY (1<<20) // Maximal AU capacity in bytes: 1024 KB predefined #define MAX_ACCESS_UNIT_CACHE_NUM 2 // Maximal Access Unit(AU) cache number to be processed, denote current AU and the next coming AU. + +enum { + BLOCK_16x16 = 0, + BLOCK_16x8 = 1, + BLOCK_8x16 = 2, + BLOCK_8x8 = 3, + BLOCK_4x4 = 4, +// BLOCK_8x4 = 5, +// BLOCK_4x8 = 6, + BLOCK_SIZE_ALL = 5 +}; + enum { CUR_AU_IDX = 0, // index symbol for current access unit SUC_AU_IDX = 1 // index symbol for successive access unit diff --git a/codec/encoder/core/src/svc_base_layer_md.cpp b/codec/encoder/core/src/svc_base_layer_md.cpp index b7d26bf5..ffeb9a4d 100644 --- a/codec/encoder/core/src/svc_base_layer_md.cpp +++ b/codec/encoder/core/src/svc_base_layer_md.cpp @@ -41,7 +41,6 @@ #include "mv_pred.h" #include "svc_enc_golomb.h" #include "svc_base_layer_md.h" -#include "sample.h" #include "encoder.h" #include "svc_encode_mb.h" #include "svc_encode_slice.h" diff --git a/codec/encoder/core/src/svc_motion_estimate.cpp b/codec/encoder/core/src/svc_motion_estimate.cpp index a7b2dc41..be20fe4a 100644 --- a/codec/encoder/core/src/svc_motion_estimate.cpp +++ b/codec/encoder/core/src/svc_motion_estimate.cpp @@ -39,7 +39,6 @@ */ #include "cpu_core.h" -#include "sample.h" #include "svc_motion_estimate.h" namespace WelsSVCEnc { @@ -183,7 +182,7 @@ void NotCalculateSatdCost( PSampleSadSatdCostFunc pSatd, void * vpMe, ///////////////////////// -// Diamond Search Related +// Diamond Search Basics ///////////////////////// bool WelsMeSadCostSelect (int32_t* iSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx, const int32_t kiDy, int32_t* pIx, int32_t* pIy) { @@ -260,7 +259,7 @@ void WelsMotionEstimateIterativeSearch (SWelsFuncPtrList* pFuncList, SWelsME* pM } ///////////////////////// -// DirectionalMv Related +// DirectionalMv Basics ///////////////////////// bool CheckDirectionalMv(PSampleSadSatdCostFunc pSad, void * vpMe, const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, @@ -291,7 +290,7 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe, } ///////////////////////// -// Cross Search Related +// Cross Search Basics ///////////////////////// void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe, uint16_t* pMvdTable, const int32_t kiFixedMvd, @@ -368,7 +367,7 @@ void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SW } ///////////////////////// -// Feature Search Related +// Feature Search Basics ///////////////////////// void SetFeatureSearchIn( SWelsFuncPtrList *pFunc, const SWelsME& sMe, const SSlice *pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage, @@ -473,23 +472,63 @@ bool FeatureSearchOne( SFeatureSearchIn &sFeatureSearchIn, const int32_t iFeatur return (i < iSearchTimesx2); } - -void MotionEstimateFeatureFullSearchScc( SFeatureSearchIn &sFeatureSearchIn, - const uint32_t kiMaxSearchPoint, +void MotionEstimateFeatureFullSearch( SFeatureSearchIn &sFeatureSearchIn, + const uint32_t kuiMaxSearchPoint, SWelsME* pMe) { - SFeatureSearchOut sFeatureSearchOut = {0}; + SFeatureSearchOut sFeatureSearchOut = {0};//TODO: this can be refactored and removed sFeatureSearchOut.uiBestSadCost = pMe->uiSadCost; sFeatureSearchOut.sBestMv = pMe->sMv; sFeatureSearchOut.pBestRef = pMe->pRefMb; - FeatureSearchOne( sFeatureSearchIn, 0, kiMaxSearchPoint, &sFeatureSearchOut ); - if ( sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost ) { + int32_t iFeatureDifference = 0;//TODO: change it according to computational-complexity setting when needed + FeatureSearchOne( sFeatureSearchIn, iFeatureDifference, kuiMaxSearchPoint, &sFeatureSearchOut ); + if ( sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost ) {//TODO: this may be refactored and removed UpdateMeResults(sFeatureSearchOut.sBestMv, sFeatureSearchOut.uiBestSadCost, sFeatureSearchOut.pBestRef, pMe); } } +///////////////////////// +// Search function option +///////////////////////// +void WelsDiamondCrossSearch(SWelsFuncPtrList *pFunc, void* vpLayer, void* vpMe, void* vpSlice) { + SDqLayer* pCurLayer = static_cast(vpLayer); + SWelsME* pMe = static_cast(vpMe); + SSlice* pSlice = static_cast(vpSlice); + // Step 1: diamond search + WelsMotionEstimateIterativeSearch(pFunc, pMe, pCurLayer->iEncStride[0], pCurLayer->pRefPic->iLineSize[0], pMe->pRefMb); + + // Step 2: CROSS search + SScreenContentStorage tmpScreenContentStorage; //TODO: use this structure from Ref + pMe->uiSadCostThreshold = tmpScreenContentStorage.uiSadCostThreshold[pMe->uiBlockSize]; + if (pMe->uiSadCost >= pMe->uiSadCostThreshold) { + WelsMotionCrossSearch(pFunc, pCurLayer, pMe, pSlice); + } +} +void WelsDiamondCrossFeatureSearch(SWelsFuncPtrList *pFunc, void* vpLayer, void* vpMe, void* vpSlice) { + SDqLayer* pCurLayer = static_cast(vpLayer); + SWelsME* pMe = static_cast(vpMe); + SSlice* pSlice = static_cast(vpSlice); + + // Step 1: diamond search + cross + WelsDiamondCrossSearch(pFunc, pCurLayer, pMe, pSlice); + + // Step 2: FeatureSearch + if (pMe->uiSadCost >= pMe->uiSadCostThreshold) { + pSlice->uiSliceFMECostDown += pMe->uiSadCost; + + SScreenBlockFeatureStorage tmpScreenBlockFeatureStorage; //TODO: use this structure from Ref + uint32_t uiMaxSearchPoint = INT_MAX;//TODO: change it according to computational-complexity setting + SFeatureSearchIn sFeatureSearchIn = {0}; + SetFeatureSearchIn(pFunc, *pMe, pSlice, &tmpScreenBlockFeatureStorage, + pCurLayer->iEncStride[0], pCurLayer->pRefPic->iLineSize[0], + &sFeatureSearchIn); + MotionEstimateFeatureFullSearch( sFeatureSearchIn, uiMaxSearchPoint, pMe); + + pSlice->uiSliceFMECostDown -= pMe->uiSadCost; + } +} } // namespace WelsSVCEnc