Merge pull request #722 from sijchen/fme_merge75

[Encoder ME] add FME switch logic
This commit is contained in:
Licai Guo 2014-04-21 22:18:55 +08:00
commit fbc179fb82
4 changed files with 76 additions and 21 deletions

View File

@ -193,7 +193,7 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe,
int32_t& iBestSadCost);
// Cross Search Basics
void LineFullSearch_c( void *pFunc, void *vpMe,
void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
@ -219,9 +219,12 @@ void HorizontalFullSearchUsingSSE41( void *pFunc, void *vpMe,
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
// Feature Search Basics
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
#define FME_DEFAULT_FEATURE_INDEX (0)
#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
#define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set.
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
@ -236,10 +239,15 @@ int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeature
int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
SFeatureSearchPreparation* pFeatureSearchPreparation);
int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock);
#define FME_DEFAULT_GOOD_FRAME_NUM (2)
#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
#define FME_DEFAULT_FEATURE_INDEX (0)
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
void UpdateFMESwitch(SDqLayer* pCurLayer);
void UpdateFMESwitchNull(SDqLayer* pCurLayer);
//inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange,
@ -262,7 +270,8 @@ inline bool CheckMvInRange( const SMVUnitXY ksCurrentMv, const SMVUnitXY ksMinMv
}
//FME switch related
inline bool CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
const int32_t iAvgMbSAD, const bool bScrollingDetected ) {
const int32_t iAvgMbSAD, const bool bScrollingDetected )
{
return ( bScrollingDetected ||( uiFMEGoodFrameCount>0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD ) );
//TODO: add the logic of iHighFreMbPrecentage
//return ( iHighFreMbPrecentage > 2

View File

@ -150,6 +150,7 @@ typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
typedef void (*PUpdateFMESwitch)(SDqLayer* pCurLayer);
#define MAX_BLOCK_TYPE 5 // prev 7
typedef struct TagSampleDealingFunc {
@ -209,10 +210,12 @@ struct TagWelsFuncPointerList {
PSearchMethodFunc pfSearchMethod[BLOCK_SIZE_ALL];
PCalculateSatdFunc pfCalculateSatd;
PCheckDirectionalMv pfCheckDirectionalMv;
PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
PLineFullSearchFunc pfVerticalFullSearch;
PLineFullSearchFunc pfHorizontalFullSearch;
PUpdateFMESwitch pfUpdateFMESwitch;
PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PCopyFunc pfCopy16x16NotAligned; //md.c

View File

@ -2468,6 +2468,7 @@ void PreprocessSliceCoding (sWelsEncCtx* pCtx) {
if (pFeatureSearchPreparation->bFMESwitchFlag
&& !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureSearchPreparation->pFeatureOfBlock;
//TODO: use ORIGIN of reference when preprocessing is ready
PerformFMEPreprocess( pFuncList, pCurLayer->pRefPic, pScreenBlockFeatureStorage );
}
@ -2475,10 +2476,17 @@ void PreprocessSliceCoding (sWelsEncCtx* pCtx) {
if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
//TBC int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
}
//assign UpdateFMESwitch pointer
if (pFeatureSearchPreparation->bFMESwitchFlag) {
pFuncList->pfUpdateFMESwitch = UpdateFMESwitch;
} else {
pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
}
} else {
//reset some status when at I_SLICE
pFeatureSearchPreparation->bFMESwitchFlag = true;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
}
}
}

View File

@ -74,6 +74,11 @@ static inline void MeEndIntepelSearch( SWelsME * pMe )
void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent ) {
if (!bScreenContent) {
pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
pFuncList->pfCalculateBlockFeatureOfFrame[0] =
pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
pFuncList->pfCalculateSingleBlockFeature[0] =
pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
} else {
pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
@ -87,6 +92,7 @@ void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41;
pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41;
}
#endif
//for feature search
pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
@ -94,7 +100,7 @@ void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
//TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
#endif
pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
}
}
@ -547,7 +553,7 @@ int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFram
pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
pFeatureSearchPreparation->bFMESwitchFlag = true;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
pFeatureSearchPreparation->iHighFreMbCount = 0;
return ENC_RETURN_SUCCESS;
@ -672,27 +678,22 @@ void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const
}
void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
{
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList ) {
//assign location pointer
uint16_t *pBufPos = pBuf;
for( int32_t i = 0 ; i < kiListSize; ++i )
{
for( int32_t i = 0 ; i < kiListSize; ++i ) {
pLocationOfFeature[i] =
pFeatureValuePointerList[i] = pBufPos;
pBufPos += (pTimesOfFeatureValue[i]<<1);
}
}
void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
uint16_t** pFeatureValuePointerList )
{
uint16_t** pFeatureValuePointerList ) {
//assign each pixel's position
uint16_t* pSrcPointer = pFeatureOfBlock;
int32_t iQpelY = 0;
for(int32_t y = 0; y < kiHeight; y++)
{
for(int32_t x = 0; x < kiWidth; x++)
{
for(int32_t y = 0; y < kiHeight; y++) {
for(int32_t x = 0; x < kiWidth; x++) {
uint16_t uiFeature = pSrcPointer[x];
ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
pFeatureValuePointerList[uiFeature] += 2;
@ -703,8 +704,7 @@ void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t
}
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
@ -867,6 +867,41 @@ void MotionEstimateFeatureFullSearch( SFeatureSearchIn &sFeatureSearchIn,
}
}
//switch related
static uint32_t CountFMECostDown( const SDqLayer* pCurLayer ) {
uint32_t uiCostDownSum = 0;
const int32_t kiSliceCount = GetCurrentSliceNum( pCurLayer->pSliceEncCtx );
if ( kiSliceCount >= 1 ) {
int32_t iSliceIndex = 0;
SSlice *pSlice = &pCurLayer->sLayerInfo.pSliceInLayer[iSliceIndex];
while( iSliceIndex < kiSliceCount ) {
uiCostDownSum += pSlice->uiSliceFMECostDown;
++ pSlice;
++ iSliceIndex;
}
}
return uiCostDownSum;
}
#define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set.
#define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set.
static void UpdateFMEGoodFrameCount(const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) {
//this strategy may be changed, here the number is derived from empirical-numbers
// uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX]
if ( iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD ) {
if ( uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX )
++ uiFMEGoodFrameCount;
} else {
if ( uiFMEGoodFrameCount > 0 )
-- uiFMEGoodFrameCount;
}
}
void UpdateFMESwitch(SDqLayer* pCurLayer) {
const uint32_t iFMECost = CountFMECostDown( pCurLayer );
const uint32_t iAvMBNormalizedRDcostDown = iFMECost / (pCurLayer->iMbWidth*pCurLayer->iMbHeight);
UpdateFMEGoodFrameCount( iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount );
}
void UpdateFMESwitchNull(SDqLayer* pCurLayer) {
}
/////////////////////////
// Search function options
/////////////////////////