add FME switch logic

This commit is contained in:
sijchen
2014-04-21 16:36:59 +08:00
parent 3f2ea77908
commit e5001c87ca
4 changed files with 76 additions and 21 deletions

View File

@@ -193,7 +193,7 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe,
int32_t& iBestSadCost); int32_t& iBestSadCost);
// Cross Search Basics // Cross Search Basics
void LineFullSearch_c( void *pFunc, void *vpMe, void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd, uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride, const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos, const int32_t kiMinPos, const int32_t kiMaxPos,
@@ -219,9 +219,12 @@ void HorizontalFullSearchUsingSSE41( void *pFunc, void *vpMe,
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice); void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
// Feature Search Basics // Feature Search Basics
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1) #define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1) #define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2 #define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
#define FME_DEFAULT_FEATURE_INDEX (0)
#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
#define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set. #define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set.
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride); int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
@@ -236,10 +239,15 @@ int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeature
int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage, int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
SFeatureSearchPreparation* pFeatureSearchPreparation); SFeatureSearchPreparation* pFeatureSearchPreparation);
int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock); int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock);
#define FME_DEFAULT_GOOD_FRAME_NUM (2)
#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
#define FME_DEFAULT_FEATURE_INDEX (0) #define FME_DEFAULT_FEATURE_INDEX (0)
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef, void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage); SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
void UpdateFMESwitch(SDqLayer* pCurLayer);
void UpdateFMESwitchNull(SDqLayer* pCurLayer);
//inline functions //inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY, inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange, const int32_t kiMaxMvRange,
@@ -262,7 +270,8 @@ inline bool CheckMvInRange( const SMVUnitXY ksCurrentMv, const SMVUnitXY ksMinMv
} }
//FME switch related //FME switch related
inline bool CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage, inline bool CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
const int32_t iAvgMbSAD, const bool bScrollingDetected ) { const int32_t iAvgMbSAD, const bool bScrollingDetected )
{
return ( bScrollingDetected ||( uiFMEGoodFrameCount>0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD ) ); return ( bScrollingDetected ||( uiFMEGoodFrameCount>0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD ) );
//TODO: add the logic of iHighFreMbPrecentage //TODO: add the logic of iHighFreMbPrecentage
//return ( iHighFreMbPrecentage > 2 //return ( iHighFreMbPrecentage > 2

View File

@@ -150,6 +150,7 @@ typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride); typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
typedef void (*PUpdateFMESwitch)(SDqLayer* pCurLayer);
#define MAX_BLOCK_TYPE 5 // prev 7 #define MAX_BLOCK_TYPE 5 // prev 7
typedef struct TagSampleDealingFunc { typedef struct TagSampleDealingFunc {
@@ -209,10 +210,12 @@ struct TagWelsFuncPointerList {
PSearchMethodFunc pfSearchMethod[BLOCK_SIZE_ALL]; PSearchMethodFunc pfSearchMethod[BLOCK_SIZE_ALL];
PCalculateSatdFunc pfCalculateSatd; PCalculateSatdFunc pfCalculateSatd;
PCheckDirectionalMv pfCheckDirectionalMv; PCheckDirectionalMv pfCheckDirectionalMv;
PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16 PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16 PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
PLineFullSearchFunc pfVerticalFullSearch; PLineFullSearchFunc pfVerticalFullSearch;
PLineFullSearchFunc pfHorizontalFullSearch; PLineFullSearchFunc pfHorizontalFullSearch;
PUpdateFMESwitch pfUpdateFMESwitch;
PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PCopyFunc pfCopy16x16NotAligned; //md.c PCopyFunc pfCopy16x16NotAligned; //md.c

View File

@@ -2468,6 +2468,7 @@ void PreprocessSliceCoding (sWelsEncCtx* pCtx) {
if (pFeatureSearchPreparation->bFMESwitchFlag if (pFeatureSearchPreparation->bFMESwitchFlag
&& !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) { && !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureSearchPreparation->pFeatureOfBlock; pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureSearchPreparation->pFeatureOfBlock;
//TODO: use ORIGIN of reference when preprocessing is ready
PerformFMEPreprocess( pFuncList, pCurLayer->pRefPic, pScreenBlockFeatureStorage ); PerformFMEPreprocess( pFuncList, pCurLayer->pRefPic, pScreenBlockFeatureStorage );
} }
@@ -2475,10 +2476,17 @@ void PreprocessSliceCoding (sWelsEncCtx* pCtx) {
if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) { if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
//TBC int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16; //TBC int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
} }
//assign UpdateFMESwitch pointer
if (pFeatureSearchPreparation->bFMESwitchFlag) {
pFuncList->pfUpdateFMESwitch = UpdateFMESwitch;
} else {
pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
}
} else { } else {
//reset some status when at I_SLICE //reset some status when at I_SLICE
pFeatureSearchPreparation->bFMESwitchFlag = true; pFeatureSearchPreparation->bFMESwitchFlag = true;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM; pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
} }
} }
} }

View File

@@ -74,6 +74,11 @@ static inline void MeEndIntepelSearch( SWelsME * pMe )
void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent ) { void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent ) {
if (!bScreenContent) { if (!bScreenContent) {
pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse; pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
pFuncList->pfCalculateBlockFeatureOfFrame[0] =
pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
pFuncList->pfCalculateSingleBlockFeature[0] =
pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
} else { } else {
pFuncList->pfCheckDirectionalMv = CheckDirectionalMv; pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
@@ -87,6 +92,7 @@ void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41; pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41;
pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41; pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41;
} }
#endif
//for feature search //for feature search
pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c; pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
@@ -94,7 +100,7 @@ void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
//TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8? //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c; pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c; pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
#endif pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
} }
} }
@@ -547,7 +553,7 @@ int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFram
pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex; pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
pFeatureSearchPreparation->bFMESwitchFlag = true; pFeatureSearchPreparation->bFMESwitchFlag = true;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM; pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
pFeatureSearchPreparation->iHighFreMbCount = 0; pFeatureSearchPreparation->iHighFreMbCount = 0;
return ENC_RETURN_SUCCESS; return ENC_RETURN_SUCCESS;
@@ -672,27 +678,22 @@ void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const
} }
void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList ) uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList ) {
{
//assign location pointer //assign location pointer
uint16_t *pBufPos = pBuf; uint16_t *pBufPos = pBuf;
for( int32_t i = 0 ; i < kiListSize; ++i ) for( int32_t i = 0 ; i < kiListSize; ++i ) {
{
pLocationOfFeature[i] = pLocationOfFeature[i] =
pFeatureValuePointerList[i] = pBufPos; pFeatureValuePointerList[i] = pBufPos;
pBufPos += (pTimesOfFeatureValue[i]<<1); pBufPos += (pTimesOfFeatureValue[i]<<1);
} }
} }
void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
uint16_t** pFeatureValuePointerList ) uint16_t** pFeatureValuePointerList ) {
{
//assign each pixel's position //assign each pixel's position
uint16_t* pSrcPointer = pFeatureOfBlock; uint16_t* pSrcPointer = pFeatureOfBlock;
int32_t iQpelY = 0; int32_t iQpelY = 0;
for(int32_t y = 0; y < kiHeight; y++) for(int32_t y = 0; y < kiHeight; y++) {
{ for(int32_t x = 0; x < kiWidth; x++) {
for(int32_t x = 0; x < kiWidth; x++)
{
uint16_t uiFeature = pSrcPointer[x]; uint16_t uiFeature = pSrcPointer[x];
ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) ); ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
pFeatureValuePointerList[uiFeature] += 2; pFeatureValuePointerList[uiFeature] += 2;
@@ -703,8 +704,7 @@ void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t
} }
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef, void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
{
uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer; uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue; uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature; uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
@@ -867,6 +867,41 @@ void MotionEstimateFeatureFullSearch( SFeatureSearchIn &sFeatureSearchIn,
} }
} }
//switch related
static uint32_t CountFMECostDown( const SDqLayer* pCurLayer ) {
uint32_t uiCostDownSum = 0;
const int32_t kiSliceCount = GetCurrentSliceNum( pCurLayer->pSliceEncCtx );
if ( kiSliceCount >= 1 ) {
int32_t iSliceIndex = 0;
SSlice *pSlice = &pCurLayer->sLayerInfo.pSliceInLayer[iSliceIndex];
while( iSliceIndex < kiSliceCount ) {
uiCostDownSum += pSlice->uiSliceFMECostDown;
++ pSlice;
++ iSliceIndex;
}
}
return uiCostDownSum;
}
#define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set.
#define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set.
static void UpdateFMEGoodFrameCount(const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) {
//this strategy may be changed, here the number is derived from empirical-numbers
// uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX]
if ( iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD ) {
if ( uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX )
++ uiFMEGoodFrameCount;
} else {
if ( uiFMEGoodFrameCount > 0 )
-- uiFMEGoodFrameCount;
}
}
void UpdateFMESwitch(SDqLayer* pCurLayer) {
const uint32_t iFMECost = CountFMECostDown( pCurLayer );
const uint32_t iAvMBNormalizedRDcostDown = iFMECost / (pCurLayer->iMbWidth*pCurLayer->iMbHeight);
UpdateFMEGoodFrameCount( iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount );
}
void UpdateFMESwitchNull(SDqLayer* pCurLayer) {
}
///////////////////////// /////////////////////////
// Search function options // Search function options
///////////////////////// /////////////////////////