Merge pull request #621 from sijchen/fme_merge42

[Encoder ME] add Preprocess functions for FME
This commit is contained in:
ruil2 2014-04-04 09:48:44 +08:00
commit 646d943200
4 changed files with 346 additions and 185 deletions

View File

@ -30,7 +30,7 @@
*
*/
//picture.h - reconstruction picture/ reference picture/ residual picture are declared here
//picture.h - reconstruction picture/ reference picture/ residual picture are declared here
#ifndef WELS_PICTURE_H__
#define WELS_PICTURE_H__
@ -39,84 +39,89 @@
#include "wels_common_basis.h"
namespace WelsSVCEnc {
#define LIST_SIZE 0x10000 //(256*256)
#define LIST_SIZE 0x10000 //(256*256)
typedef struct TagScreenBlockFeatureStorage
{
uint32_t* pTimesOfFeatureValue; // times of every value in Feature
uint16_t** pLocationOfFeature; // uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i;
uint16_t* pLocationPointer; // buffer of position array
int32_t iActualListSize; // actual list size
//Input
uint16_t* pFeatureOfBlockPointer; // Pointer to pFeatureOfBlock
int32_t iIs16x16; //Feature block size
uint8_t uiFeatureStrategyIndex;// index of hash strategy
//Modify
uint32_t* pTimesOfFeatureValue; // times of every value in Feature
uint16_t** pLocationOfFeature; // uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i;
uint16_t* pLocationPointer; // buffer of position array
int32_t iActualListSize; // actual list size
uint32_t uiSadCostThreshold[BLOCK_SIZE_ALL];
bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
} SScreenBlockFeatureStorage; //should be stored with RefPic, one for each frame
typedef struct TagFeatureSearchPreparation{
SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
uint8_t uiFeatureStrategyIndex;// index of hash strategy
uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
uint8_t uiFeatureStrategyIndex;// index of hash strategy
/* for FME frame-level switch */
bool bFMESwitchFlag;
uint8_t uiFMEGoodFrameCount;
int32_t iHighFreMbCount;
/* for FME frame-level switch */
bool bFMESwitchFlag;
uint8_t uiFMEGoodFrameCount;
int32_t iHighFreMbCount;
}SFeatureSearchPreparation;//maintain only one
/*
* Reconstructed Picture definition
* It is used to express reference picture, also consequent reconstruction picture for output
* Reconstructed Picture definition
* It is used to express reference picture, also consequent reconstruction picture for output
*/
typedef struct TagPicture {
/************************************payload pData*********************************/
uint8_t* pBuffer; // pointer to the first allocated byte, basical offset of pBuffer, dimension:
uint8_t* pData[3]; // pointer to picture planes respectively
int32_t iLineSize[3]; // iLineSize of picture planes respectively
uint8_t* pBuffer; // pointer to the first allocated byte, basical offset of pBuffer, dimension:
uint8_t* pData[3]; // pointer to picture planes respectively
int32_t iLineSize[3]; // iLineSize of picture planes respectively
// picture information
/*******************************from other standard syntax****************************/
/*from pSps*/
int32_t iWidthInPixel; // picture width in pixel
int32_t iHeightInPixel;// picture height in pixel
int32_t iPictureType; // got from sSliceHeader(): eSliceType
int32_t iFramePoc; // frame POC
int32_t iWidthInPixel; // picture width in pixel
int32_t iHeightInPixel;// picture height in pixel
int32_t iPictureType; // got from sSliceHeader(): eSliceType
int32_t iFramePoc; // frame POC
float fFrameRate; // MOVE
int32_t iFrameNum; // frame number //for pRef pic management
float fFrameRate; // MOVE
int32_t iFrameNum; // frame number //for pRef pic management
uint32_t* uiRefMbType; // for iMbWidth*iMbHeight
uint8_t* pRefMbQp; // for iMbWidth*iMbHeight
uint32_t* uiRefMbType; // for iMbWidth*iMbHeight
uint8_t* pRefMbQp; // for iMbWidth*iMbHeight
int32_t* pMbSkipSad; //for iMbWidth*iMbHeight
SMVUnitXY* sMvList;
SMVUnitXY* sMvList;
/*******************************sef_definition for misc use****************************/
int32_t iMarkFrameNum;
int32_t iLongTermPicNum;
int32_t iMarkFrameNum;
int32_t iLongTermPicNum;
bool bUsedAsRef; //for pRef pic management
bool bIsLongRef; // long term reference frame flag //for pRef pic management
bool bUsedAsRef; //for pRef pic management
bool bIsLongRef; // long term reference frame flag //for pRef pic management
bool bIsSceneLTR; //long term reference & large scene change
uint8_t uiRecieveConfirmed;
uint8_t uiTemporalId;
uint8_t uiSpatialId;
uint8_t uiRecieveConfirmed;
uint8_t uiTemporalId;
uint8_t uiSpatialId;
int32_t iFrameAverageQp;
} SPicture;
/*
* Residual Picture
* Residual Picture
*/
//typedef struct Rs_Picture_s{
// int16_t *pBuffer[4]; // base pBuffer
// int16_t *pData[4]; // pData pBuffer
// int32_t real_linesize[4];// actual iLineSize of picture planes respectively
// int32_t used_linesize[4];// iLineSize of picture planes respectively used currently
// int32_t planes; // planes of YUV
// int16_t *pBuffer[4]; // base pBuffer
// int16_t *pData[4]; // pData pBuffer
// int32_t real_linesize[4];// actual iLineSize of picture planes respectively
// int32_t used_linesize[4];// iLineSize of picture planes respectively used currently
// int32_t planes; // planes of YUV
//}Rs_Picture_t;
} // end of namespace WelsSVCEnc {
} // end of namespace WelsSVCEnc {
#endif//WELS_PICTURE_H__

View File

@ -29,11 +29,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file svc motion estimate.h
* \file svc motion estimate.h
*
* \brief Interfaces introduced in svc mb motion estimation
* \brief Interfaces introduced in svc mb motion estimation
*
* \date 08/11/2009 Created
* \date 08/11/2009 Created
*
*************************************************************************************
*/
@ -46,52 +46,52 @@
namespace WelsSVCEnc {
#define CAMERA_STARTMV_RANGE (64)
#define ITERATIVE_TIMES (16)
#define ITERATIVE_TIMES (16)
#define CAMERA_MV_RANGE (CAMERA_STARTMV_RANGE+ITERATIVE_TIMES)
#define CAMERA_MVD_RANGE ((CAMERA_MV_RANGE+1)<<1) //mvd=mv_range*2;
#define BASE_MV_MB_NMB ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1)
#define BASE_MV_MB_NMB ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1)
#define CAMERA_HIGHLAYER_MVD_RANGE (243)//mvd range;
#define EXPANDED_MV_RANGE (504) //=512-8 rather than 511 to sacrifice same edge point but save complexity in assemblys
#define EXPANDED_MVD_RANGE ((504+1)<<1)
enum
{
ME_DIA = 0x01, // LITTLE DIAMOND= 0x01
ME_CROSS = 0x02, // CROSS= 0x02
ME_FME = 0x04, // FME = 0x04
ME_FULL = 0x10, // FULL
ME_DIA = 0x01, // LITTLE DIAMOND= 0x01
ME_CROSS = 0x02, // CROSS= 0x02
ME_FME = 0x04, // FME = 0x04
ME_FULL = 0x10, // FULL
// derived ME methods combination
ME_DIA_CROSS = (ME_DIA|ME_CROSS), // DIA+CROSS
ME_DIA_CROSS_FME = (ME_DIA_CROSS|ME_FME), // DIA+CROSS+FME
ME_DIA_CROSS = (ME_DIA|ME_CROSS), // DIA+CROSS
ME_DIA_CROSS_FME = (ME_DIA_CROSS|ME_FME), // DIA+CROSS+FME
};
union SadPredISatdUnit {
uint32_t uiSadPred;
uint32_t uiSatd; //reuse the sad_pred as a temp satd pData
uint32_t uiSadPred;
uint32_t uiSatd; //reuse the sad_pred as a temp satd pData
};
typedef struct TagWelsME {
/* input */
uint16_t* pMvdCost;
union SadPredISatdUnit uSadPredISatd; //reuse the sad_pred as a temp pData
uint32_t uiSadCost; //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535
uint32_t uiSatdCost; /* satd + lm * nbits */
uint32_t uiSadCostThreshold;
int32_t iCurMeBlockPixX;
int32_t iCurMeBlockPixY;
uint8_t uiBlockSize; /* BLOCK_WxH */
uint8_t uiReserved;
uint16_t* pMvdCost;
union SadPredISatdUnit uSadPredISatd; //reuse the sad_pred as a temp pData
uint32_t uiSadCost; //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535
uint32_t uiSatdCost; /* satd + lm * nbits */
uint32_t uiSadCostThreshold;
int32_t iCurMeBlockPixX;
int32_t iCurMeBlockPixY;
uint8_t uiBlockSize; /* BLOCK_WxH */
uint8_t uiReserved;
uint8_t* pEncMb;
uint8_t* pRefMb;
uint8_t* pColoRefMb;
uint8_t* pEncMb;
uint8_t* pRefMb;
uint8_t* pColoRefMb;
SMVUnitXY sMvp;
SMVUnitXY sMvBase;
SMVUnitXY sDirectionalMv;
SMVUnitXY sMvp;
SMVUnitXY sMvBase;
SMVUnitXY sDirectionalMv;
/* output */
SMVUnitXY sMv;
SMVUnitXY sMv;
} SWelsME;
typedef struct TagFeatureSearchIn{
@ -134,50 +134,50 @@ typedef struct TagFeatureSearchOut{
void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent );
/*!
* \brief BL mb motion estimate search
* \brief BL mb motion estimate search
*
* \param enc Wels encoder context
* \param m Wels me information
* \param enc Wels encoder context
* \param m Wels me information
*
* \return NONE
* \return NONE
*/
void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice);
/*!
* \brief BL mb motion estimate initial point testing
* \brief BL mb motion estimate initial point testing
*
* \param enc Wels encoder context
* \param m Wels me information
* \param mv_range search range in motion estimate
* \param point the best match point in motion estimation
* \param enc Wels encoder context
* \param m Wels me information
* \param mv_range search range in motion estimate
* \param point the best match point in motion estimation
*
* \return NONE
* \return NONE
*/
/*!
* \brief EL mb motion estimate initial point testing
* \brief EL mb motion estimate initial point testing
*
* \param pix_func SSampleDealingFunc
* \param m Wels me information
* \param mv_range search range in motion estimate
* \param point the best match point in motion estimation
* \param pix_func SSampleDealingFunc
* \param m Wels me information
* \param mv_range search range in motion estimate
* \param point the best match point in motion estimation
*
* \return NONE
* \return NONE
*/
bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
const int32_t kiStrideEnc, const int32_t kiStrideRef);
/*!
* \brief mb iterative motion estimate search
* \brief mb iterative motion estimate search
*
* \param enc Wels encoder context
* \param m Wels me information
* \param point the best match point in motion estimation
* \param enc Wels encoder context
* \param m Wels me information
* \param point the best match point in motion estimation
*
* \return NONE
* \return NONE
*/
void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, void* pLpme, void* pLpslice, const int32_t kiEncStride, const int32_t kiRefStride);
@ -193,18 +193,30 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe,
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost);
void LineFullSearch_c( void *pFunc, void *vpMe,
// Cross Search Basics
void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch );
void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch );
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
// Feature Search Basics
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
//inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange,
SMVUnitXY* pMvMin, SMVUnitXY* pMvMax)

View File

@ -141,11 +141,14 @@ typedef void (*PCalculateSatdFunc) ( PSampleSadSatdCostFunc pSatd, void * vpMe,
typedef bool (*PCheckDirectionalMv) (PSampleSadSatdCostFunc pSad, void * vpMe,
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost);
typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch );
typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
#define MAX_BLOCK_TYPE 5 // prev 7
typedef struct TagSampleDealingFunc {
@ -175,15 +178,15 @@ typedef void (*PMarkPicFunc)(void* pCtx);
typedef bool (*PUpdateRefListFunc) (void* pCtx);
struct TagWelsFuncPointerList {
PExpandPictureFunc pfExpandLumaPicture;
PExpandPictureFunc pfExpandLumaPicture;
PExpandPictureFunc
pfExpandChromaPicture[2];// 0: for chroma unalignment && width_uv >= 16; 1: for chroma alignment && width_uv >= 16;
PFillInterNeighborCacheFunc pfFillInterNeighborCache;
PGetVarianceFromIntraVaaFunc pfGetVarianceFromIntraVaa;
PGetMbSignFromInterVaaFunc pfGetMbSignFromInterVaa;
PUpdateMbMvFunc pfUpdateMbMv;
PGetVarianceFromIntraVaaFunc pfGetVarianceFromIntraVaa;
PGetMbSignFromInterVaaFunc pfGetMbSignFromInterVaa;
PUpdateMbMvFunc pfUpdateMbMv;
PInterMdFirstIntraModeFunc pfFirstIntraMode; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PIntraFineMdFunc
pfIntraFineMd; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
@ -193,11 +196,11 @@ struct TagWelsFuncPointerList {
PInterMdBackgroundDecisionFunc pfInterMdBackgroundDecision;
PInterMdBackgroundInfoUpdateFunc pfInterMdBackgroundInfoUpdate;
SMcFunc sMcFuncs;
SMcFunc sMcFuncs;
SSampleDealingFunc sSampleDealingFuncs;
PGetIntraPredFunc pfGetLumaI16x16Pred[I16_PRED_DC_A];
PGetIntraPredFunc pfGetLumaI4x4Pred[I4_PRED_A];
PGetIntraPredFunc pfGetChromaPred[C_PRED_A];
PGetIntraPredFunc pfGetLumaI16x16Pred[I16_PRED_DC_A];
PGetIntraPredFunc pfGetLumaI4x4Pred[I4_PRED_A];
PGetIntraPredFunc pfGetChromaPred[C_PRED_A];
PMotionSearchFunc
pfMotionSearch[BLOCK_STATIC_IDC_ALL]; //svc_encode_slice.c svc_mode_decision.c svc_enhance_layer_md.c svc_base_layer_md.c
@ -205,61 +208,63 @@ struct TagWelsFuncPointerList {
PCalculateSatdFunc pfCalculateSatd;
PCheckDirectionalMv pfCheckDirectionalMv;
PLineFullSearchFunc pfLineFullSearch;
PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PCopyFunc pfCopy16x16NotAligned; //md.c
PCopyFunc pfCopy8x8Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c md.c
PCopyFunc pfCopy16x8NotAligned; //for MeRefineFracPixel 16x8 based
PCopyFunc pfCopy8x16Aligned; //for MeRefineFracPixel 8x16 based
PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PCopyFunc pfCopy16x16NotAligned; //md.c
PCopyFunc pfCopy8x8Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c md.c
PCopyFunc pfCopy16x8NotAligned; //for MeRefineFracPixel 16x8 based
PCopyFunc pfCopy8x16Aligned; //for MeRefineFracPixel 8x16 based
//svc_encode_mb.c encode_mb_aux.c
PDctFunc pfDctT4;
PDctFunc pfDctFourT4;
PDctFunc pfDctT4;
PDctFunc pfDctFourT4;
PCalculateSingleCtrFunc pfCalculateSingleCtr4x4;
PScanFunc pfScan4x4; //DC/AC
PScanFunc pfScan4x4Ac;
PCalculateSingleCtrFunc pfCalculateSingleCtr4x4;
PScanFunc pfScan4x4; //DC/AC
PScanFunc pfScan4x4Ac;
PQuantizationFunc pfQuantization4x4;
PQuantizationFunc pfQuantizationFour4x4;
PQuantizationDcFunc pfQuantizationDc4x4;
PQuantizationMaxFunc pfQuantizationFour4x4Max;
PQuantizationHadamardFunc pfQuantizationHadamard2x2;
PQuantizationSkipFunc pfQuantizationHadamard2x2Skip;
PQuantizationFunc pfQuantization4x4;
PQuantizationFunc pfQuantizationFour4x4;
PQuantizationDcFunc pfQuantizationDc4x4;
PQuantizationMaxFunc pfQuantizationFour4x4Max;
PQuantizationHadamardFunc pfQuantizationHadamard2x2;
PQuantizationSkipFunc pfQuantizationHadamard2x2Skip;
PTransformHadamard4x4Func pfTransformHadamard4x4Dc;
PTransformHadamard4x4Func pfTransformHadamard4x4Dc;
PGetNoneZeroCountFunc pfGetNoneZeroCount;
PGetNoneZeroCountFunc pfGetNoneZeroCount;
PDeQuantizationFunc pfDequantization4x4;
PDeQuantizationFunc pfDequantizationFour4x4;
PDeQuantizationHadamardFunc pfDequantizationIHadamard4x4;
PIDctFunc pfIDctFourT4;
PIDctFunc pfIDctT4;
PIDctFunc pfIDctI16x16Dc;
PDeQuantizationFunc pfDequantization4x4;
PDeQuantizationFunc pfDequantizationFour4x4;
PDeQuantizationHadamardFunc pfDequantizationIHadamard4x4;
PIDctFunc pfIDctFourT4;
PIDctFunc pfIDctT4;
PIDctFunc pfIDctI16x16Dc;
// OPTI: if MT under diff uiSliceMode, need change here
//PDynamicSlicingStepBackFunc dynslc_funcpointer_stepback;//svc_encode_slice.c
//DYNSLC_LNGTH_CRTL dynslc_funcpointer_slcsize_ctrl;
//PDynamicSlicingStepBackFunc dynslc_funcpointer_stepback;//svc_encode_slice.c
//DYNSLC_LNGTH_CRTL dynslc_funcpointer_slcsize_ctrl;
/* For Deblocking */
DeblockingFunc pfDeblocking;
PSetNoneZeroCountZeroFunc pfSetNZCZero;
SWelsRcFunc pfRc;
SWelsRcFunc pfRc;
PAccumulateSadFunc pfAccumulateSadForRc;
PSetMemoryZero pfSetMemZeroSize8; // for size is times to 8
PSetMemoryZero pfSetMemZeroSize64Aligned16; // for size is times of 64, and address is align to 16
PSetMemoryZero pfSetMemZeroSize64; // for size is times of 64, and don't know address is align to 16 or not
PSetMemoryZero pfSetMemZeroSize8; // for size is times to 8
PSetMemoryZero pfSetMemZeroSize64Aligned16; // for size is times of 64, and address is align to 16
PSetMemoryZero pfSetMemZeroSize64; // for size is times of 64, and don't know address is align to 16 or not
PBuildRefListFunc pBuildRefList;
PMarkPicFunc pMarkPic;
PUpdateRefListFunc pUpdateRefList;
};
} //end of namespace WelsSVCEnc {
} //end of namespace WelsSVCEnc {
#endif//WELS_ENCODER_FUNCTION_POINTERS_DEFINITION_H_

View File

@ -29,16 +29,17 @@
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file svc motion estimate.c
* \file svc motion estimate.c
*
* \brief Interfaces introduced in svc mb motion estimation
* \brief Interfaces introduced in svc mb motion estimation
*
* \date 08/11/2009 Created
* \date 08/11/2009 Created
*
*************************************************************************************
*/
#include "cpu_core.h"
#include "ls_defines.h"
#include "svc_motion_estimate.h"
namespace WelsSVCEnc {
@ -67,16 +68,23 @@ void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
pFuncList->pfLineFullSearch = LineFullSearch_c;
if ( uiCpuFlag & WELS_CPU_SSE41 ) {
}
//for feature search
pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c;
//TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
}
}
/*!
* \brief BL mb motion estimate search
* \brief BL mb motion estimate search
*
* \param enc Wels encoder context
* \param pMe Wels me information
* \param enc Wels encoder context
* \param pMe Wels me information
*
* \return NONE
* \return NONE
*/
void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice) {
@ -96,37 +104,37 @@ void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, void* pLplayer, void
}
/*!
* \brief EL mb motion estimate initial point testing
* \brief EL mb motion estimate initial point testing
*
* \param pix_pFuncList SSampleDealingFunc
* \param pMe Wels me information
* \param mv_range search range in motion estimate
* \param point the best match point in motion estimation
* \param pix_pFuncList SSampleDealingFunc
* \param pMe Wels me information
* \param mv_range search range in motion estimate
* \param point the best match point in motion estimation
*
* \return NONE
* \return NONE
*/
bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, int32_t iStrideEnc,
int32_t iStrideRef) {
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
const uint16_t* kpMvdCost = pMe->pMvdCost;
uint8_t* const kpEncMb = pMe->pEncMb;
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
const uint16_t* kpMvdCost = pMe->pMvdCost;
uint8_t* const kpEncMb = pMe->pEncMb;
int16_t iMvc0, iMvc1;
int32_t iSadCost;
int32_t iBestSadCost;
uint8_t* pRefMb;
uint8_t* pFref2;
uint32_t i;
const uint32_t kuiMvcNum = pSlice->uiMvcNum;
const SMVUnitXY* kpMvcList = &pSlice->sMvc[0];
const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin;
const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax;
const SMVUnitXY ksMvp = pMe->sMvp;
const uint32_t kuiMvcNum = pSlice->uiMvcNum;
const SMVUnitXY* kpMvcList = &pSlice->sMvc[0];
const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin;
const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax;
const SMVUnitXY ksMvp = pMe->sMvp;
SMVUnitXY sMv;
// Step 1: Initial point prediction
// init with sMvp
sMv.iMvX = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
sMv.iMvY = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
sMv.iMvX = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
sMv.iMvY = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
pRefMb = &pMe->pRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
@ -171,7 +179,7 @@ bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
void CalculateSatdCost( PSampleSadSatdCostFunc pSatd, void * vpMe,
const int32_t kiEncStride, const int32_t kiRefStride ) {
SWelsME* pMe = static_cast<SWelsME *>(vpMe);
SWelsME* pMe = static_cast<SWelsME *>(vpMe);
pMe->uSadPredISatd.uiSatd = pSatd(pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride);
pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX,
pMe->sMv.iMvY - pMe->sMvp.iMvY);
@ -266,7 +274,7 @@ void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, void* pLpme, void* pLpslice
bool CheckDirectionalMv(PSampleSadSatdCostFunc pSad, void * vpMe,
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost) {
SWelsME* pMe = static_cast<SWelsME *>(vpMe);
SWelsME* pMe = static_cast<SWelsME *>(vpMe);
const int16_t kiMvX = pMe->sDirectionalMv.iMvX;
const int16_t kiMvY = pMe->sDirectionalMv.iMvY;
@ -295,34 +303,34 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe,
// Cross Search Basics
/////////////////////////
void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
}
void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
const int32_t kiCurMeBlockPix = bVerticalSearch?pMe->iCurMeBlockPixY:pMe->iCurMeBlockPixX;
const int32_t kiCurMeBlockPix = bVerticalSearch?pMe->iCurMeBlockPixY:pMe->iCurMeBlockPixX;
const int32_t kiStride = bVerticalSearch?kiRefStride:1;
uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiStride];
uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiStride];
uint16_t* pMvdCost = &(pMvdTable[kiMinPos<<2]);
uint32_t uiBestCost = 0xFFFFFFFF;
int32_t iBestPos = 0;
uint32_t uiBestCost = 0xFFFFFFFF;
int32_t iBestPos = 0;
for ( int32_t iTargetPos = kiMinPos; iTargetPos < kiMaxPos; ++ iTargetPos ) {
uint8_t* const kpEncMb = pMe->pEncMb;
uint8_t* const kpEncMb = pMe->pEncMb;
uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + *pMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
pRef += kiStride;
pMvdCost+=4;
@ -400,8 +408,8 @@ int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureO
int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFeatureStrategyIndex,
const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t kiMe16x16, const int32_t kiMe8x8,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
if (((kiMe8x8&ME_FME)==ME_FME) && ((kiMe16x16&ME_FME)==ME_FME)) {
return ENC_RETURN_UNSUPPORTED_PARA;
@ -411,7 +419,7 @@ int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFea
const bool bIsBlock8x8 = ((kiMe8x8 & ME_FME)==ME_FME);
const int32_t kiMarginSize = bIsBlock8x8?8:16;
const int32_t kiFrameSize = (kiFrameWidth-kiMarginSize) * (kiFrameHeight-kiMarginSize);
const int32_t kiListSize = (0==kiFeatureStrategyIndex)?(bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16):256;
const int32_t kiListSize = (0==kiFeatureStrategyIndex)?(bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16):256;
pScreenBlockFeatureStorage->pTimesOfFeatureValue = (uint32_t*)pMa->WelsMalloc(kiListSize*sizeof(uint32_t),"pScreenBlockFeatureStorage->pTimesOfFeatureValue");
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pTimesOfFeatureValue)
@ -422,7 +430,7 @@ int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFea
pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMalloc(2*kiFrameSize*sizeof(uint16_t), "pScreenBlockFeatureStorage->pLocationPointer");
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer)
pScreenBlockFeatureStorage->iActualListSize = kiListSize;
pScreenBlockFeatureStorage->iActualListSize = kiListSize;
return ENC_RETURN_SUCCESS;
}
int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage ) {
@ -440,13 +448,144 @@ int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeature
}
return ENC_RETURN_UNEXPECTED;
}
//preprocess related
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
{
int32_t iSum = 0, i;
for(i = 0; i < 8; i++)
{
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
pRef += kiRefStride;
}
return iSum;
}
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
{
int32_t iSum = 0, i;
for(i = 0; i < 16; i++)
{
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11];
iSum += pRef[12] + pRef[13] + pRef[14] + pRef[15];
pRef += kiRefStride;
}
return iSum;
}
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
{
int32_t x, y;
uint8_t *pRef;
uint16_t *pBuffer;
int32_t iSum;
for(y = 0; y < kiHeight; y++) {
pRef = pRefPicture + kiRefStride * y;
pBuffer = pFeatureOfBlock + kiWidth * y;
for(x = 0; x < kiWidth; x++) {
iSum = SumOf8x8SingleBlock_c(pRef + x, kiRefStride);
pBuffer[x] = iSum;
pTimesOfFeatureValue[iSum]++;
}
}
}
void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
{//TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
int32_t x, y;
uint8_t *pRef;
uint16_t *pBuffer;
int32_t iSum;
for(y = 0; y < kiHeight; y++) {
pRef = pRefPicture + kiRefStride * y;
pBuffer = pFeatureOfBlock + kiWidth * y;
for(x = 0; x < kiWidth; x++) {
iSum = SumOf16x16SingleBlock_c(pRef + x, kiRefStride);
pBuffer[x] = iSum;
pTimesOfFeatureValue[iSum]++;
}
}
}
void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
{
//assign location pointer
uint16_t *pBufPos = pBuf;
for( int32_t i = 0 ; i < kiListSize; ++i )
{
pLocationOfFeature[i] =
pFeatureValuePointerList[i] = pBufPos;
pBufPos += (pTimesOfFeatureValue[i]<<1);
}
}
void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
uint16_t** pFeatureValuePointerList )
{
//assign each pixel's position
uint16_t* pSrcPointer = pFeatureOfBlock;
int32_t iQpelY = 0;
for(int32_t y = 0; y < kiHeight; y++)
{
for(int32_t x = 0; x < kiWidth; x++)
{
uint16_t uiFeature = pSrcPointer[x];
ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
pFeatureValuePointerList[uiFeature] += 2;
}
iQpelY += 4;
pSrcPointer += kiWidth;
}
}
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer;
uint8_t* pRefData = pRef->pData[0];
const int32_t iRefStride = pRef->iLineSize[0];
int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
bool bUseSum = (pScreenBlockFeatureStorage->uiFeatureStrategyIndex == 0);
const int32_t iEdgeDiscard = (iIs16x16?16:8);//this is to save complexity of padding on pRef
const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard;
const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard;
const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize;
uint16_t* pFeatureValuePointerList[WELS_MAX(LIST_SIZE_SUM_16x16,LIST_SIZE_MSE_16x16)] = {0};
memset(pTimesOfFeatureValue, 0, sizeof(int32_t)*kiActualListSize);
(pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16])(pRefData,iWidth, kiHeight, iRefStride, pFeatureOfBlock, pTimesOfFeatureValue);
//assign pLocationOfFeature pointer
InitializeHashforFeature_c( pTimesOfFeatureValue, pBuf, kiActualListSize,
pLocationOfFeature, pFeatureValuePointerList );
//assign each pixel's pLocationOfFeature
FillQpelLocationByFeatureValue_c(pFeatureOfBlock, iWidth, kiHeight, pFeatureValuePointerList);
}
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
CalculateFeatureOfBlock(pFunc, pRef, pScreenBlockFeatureStorage );
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = true;
}
//search related
void SetFeatureSearchIn( SWelsFuncPtrList *pFunc, const SWelsME& sMe,
const SSlice *pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
const int32_t kiEncStride, const int32_t kiRefStride,
SFeatureSearchIn* pFeatureSearchIn ) {
pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
//pFeatureSearchIn->iFeatureOfCurrent=
pFeatureSearchIn->iFeatureOfCurrent=pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16==sMe.uiBlockSize](sMe.pEncMb, kiEncStride);
pFeatureSearchIn->pEnc = sMe.pEncMb;
pFeatureSearchIn->pColoRef = sMe.pColoRefMb;