Merge pull request #690 from sijchen/fme_merge65

[Encoder ME] Add calling of FME preprocess calculation

Approved by Xiaolin.
This commit is contained in:
volvet 2014-04-16 17:41:17 +08:00
commit 6cb48fc547
6 changed files with 189 additions and 91 deletions

View File

@ -56,18 +56,6 @@ typedef struct TagScreenBlockFeatureStorage
bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
} SScreenBlockFeatureStorage; //should be stored with RefPic, one for each frame
typedef struct TagFeatureSearchPreparation{
SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
uint8_t uiFeatureStrategyIndex;// index of hash strategy
/* for FME frame-level switch */
bool bFMESwitchFlag;
uint8_t uiFMEGoodFrameCount;
int32_t iHighFreMbCount;
}SFeatureSearchPreparation;//maintain only one
/*
* Reconstructed Picture definition
* It is used to express reference picture, also consequent reconstruction picture for output

View File

@ -56,7 +56,19 @@ namespace WelsSVCEnc {
///////////////////////////////////DQ Layer level///////////////////////////////////
typedef struct TagDqLayer SDqLayer;
typedef SDqLayer* pDqLayer;
typedef SDqLayer* pDqLayer;
typedef struct TagFeatureSearchPreparation{
SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
uint8_t uiFeatureStrategyIndex;// index of hash strategy
/* for FME frame-level switch */
bool bFMESwitchFlag;
uint8_t uiFMEGoodFrameCount;
int32_t iHighFreMbCount;
}SFeatureSearchPreparation;//maintain only one
typedef struct TagLayerInfo {
SNalUnitHeaderExt sNalHeaderExt;
@ -98,6 +110,8 @@ struct TagDqLayer {
int32_t* pLastCodedMbIdxOfPartition; // for dynamic slicing mode
int32_t* pLastMbIdxOfPartition; // for dynamic slicing mode
SFeatureSearchPreparation* pFeatureSearchPreparation;
SDqLayer* pRefLayer; // pointer to referencing dq_layer of current layer to be decoded
};

View File

@ -222,6 +222,8 @@ void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SW
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
#define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set.
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
@ -231,6 +233,13 @@ void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const
int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage );
int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
SFeatureSearchPreparation* pFeatureSearchPreparation);
int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock);
#define FME_DEFAULT_GOOD_FRAME_NUM (2)
#define FME_DEFAULT_FEATURE_INDEX (0)
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
//inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange,
@ -251,5 +260,14 @@ inline bool CheckMvInRange( const SMVUnitXY ksCurrentMv, const SMVUnitXY ksMinMv
return (CheckMvInRange(ksCurrentMv.iMvX, ksMinMv.iMvX, ksMaxMv.iMvX)
&& CheckMvInRange(ksCurrentMv.iMvY, ksMinMv.iMvY, ksMaxMv.iMvY));
}
//FME switch related
inline bool CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
const int32_t iAvgMbSAD, const bool bScrollingDetected ) {
return ( bScrollingDetected ||( uiFMEGoodFrameCount>0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD ) );
//TODO: add the logic of iHighFreMbPrecentage
//return ( iHighFreMbPrecentage > 2
// && ( bScrollingDetected || iHighFreMbPrecentage >15
// ||( uiFMEGoodFrameCount>0 && iFrameSAD > FMESWITCH_FRAMESAD_THRESHOLD ) ) );
}
}
#endif

View File

@ -708,7 +708,7 @@ static inline int32_t InitDqLayers (sWelsEncCtx** ppCtx) {
iDlayerCount = pParam->iSpatialLayerNum;
iNumRef = pParam->iNumRefFrame;
const int32_t kiFeatureStrategyIndex = 0;
const int32_t kiFeatureStrategyIndex = FME_DEFAULT_FEATURE_INDEX;
const int32_t kiMe16x16 = ME_DIA_CROSS;
const int32_t kiMe8x8 = ME_DIA_CROSS_FME;
const int32_t kiNeedFeatureStorage = (pParam->iUsageType != SCREEN_CONTENT_REAL_TIME)?0:
@ -733,7 +733,7 @@ static inline int32_t InitDqLayers (sWelsEncCtx** ppCtx) {
pRefList = (SRefList*)pMa->WelsMallocz (sizeof (SRefList), "pRefList");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList), FreeMemorySvc (ppCtx))
do {
pRefList->pRef[i] = AllocPicture (pMa, kiWidth, kiHeight, true, kiNeedFeatureStorage); // to use actual size of current layer
pRefList->pRef[i] = AllocPicture (pMa, kiWidth, kiHeight, true, (iDlayerIndex == iDlayerCount-1)?kiNeedFeatureStorage:0); // to use actual size of current layer
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList->pRef[i]), FreeMemorySvc (ppCtx))
++ i;
} while (i < 1 + iNumRef);
@ -830,6 +830,18 @@ static inline int32_t InitDqLayers (sWelsEncCtx** ppCtx) {
}
}
//
if (kiNeedFeatureStorage && iDlayerIndex==iDlayerCount-1)
{
pDqLayer->pFeatureSearchPreparation = static_cast<SFeatureSearchPreparation*> (pMa->WelsMallocz (sizeof (SFeatureSearchPreparation), "pFeatureSearchPreparation"));
WELS_VERIFY_RETURN_PROC_IF (1, NULL==pDqLayer->pFeatureSearchPreparation, FreeMemorySvc (ppCtx));
int32_t iReturn = RequestFeatureSearchPreparation(pMa, pDlayer->iFrameWidth, pDlayer->iFrameHeight, kiNeedFeatureStorage,
pDqLayer->pFeatureSearchPreparation);
WELS_VERIFY_RETURN_PROC_IF (1, ENC_RETURN_SUCCESS!=iReturn, FreeMemorySvc (ppCtx));
} else {
pDqLayer->pFeatureSearchPreparation = NULL;
}
(*ppCtx)->ppDqLayerList[iDlayerIndex] = pDqLayer;
++ iDlayerIndex;
@ -1572,6 +1584,12 @@ void FreeMemorySvc (sWelsEncCtx** ppCtx) {
pDq->pLastMbIdxOfPartition = NULL;
}
if (pDq->pFeatureSearchPreparation) {
ReleaseFeatureSearchPreparation(pMa, pDq->pFeatureSearchPreparation->pFeatureOfBlock);
pMa->WelsFree (pDq->pFeatureSearchPreparation, "pFeatureSearchPreparation");
pDq->pFeatureSearchPreparation = NULL;
}
pMa->WelsFree (pDq, "pDq");
pDq = NULL;
pCtx->ppDqLayerList[ilayer] = NULL;
@ -2427,8 +2445,41 @@ void PreprocessSliceCoding (sWelsEncCtx* pCtx) {
pFuncList->pfCalculateSatd = CalculateSatdCost;
pFuncList->pfInterFineMd = WelsMdInterFinePartition;
}
}
//to init at each frame will be needed when dealing with hybrid content (camera+screen)
if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) {
SFeatureSearchPreparation* pFeatureSearchPreparation = pCurLayer->pFeatureSearchPreparation;
if (pFeatureSearchPreparation) {
pFeatureSearchPreparation->iHighFreMbCount = 0;
if (P_SLICE == pCtx->eSliceType) {
//calculate bFMESwitchFlag
SVAAFrameInfoExt *pVaaExt = static_cast<SVAAFrameInfoExt *>(pCtx->pVaa);
const int32_t kiMbSize = pCurLayer->iMbHeight*pCurLayer->iMbWidth;
pFeatureSearchPreparation->bFMESwitchFlag = CalcFMESwitchFlag( pFeatureSearchPreparation->uiFMEGoodFrameCount,
pFeatureSearchPreparation->iHighFreMbCount*100/kiMbSize, pCtx->pVaa->sVaaCalcInfo.iFrameSad/kiMbSize,
pVaaExt->sScrollDetectInfo.bScrollDetectFlag);
//PerformFMEPreprocess
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage = pCurLayer->pRefPic->pScreenBlockFeatureStorage;
pFeatureSearchPreparation->pRefBlockFeature = pScreenBlockFeatureStorage;
if (pFeatureSearchPreparation->bFMESwitchFlag
&& !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureSearchPreparation->pFeatureOfBlock;
PerformFMEPreprocess( pFuncList, pCurLayer->pRefPic, pScreenBlockFeatureStorage );
}
//assign ME pointer
if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
//TBC int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
}
} else {
//reset some status when at I_SLICE
pFeatureSearchPreparation->bFMESwitchFlag = true;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
}
}
}
}

View File

@ -113,7 +113,6 @@ SPicture* AllocPicture (CMemoryAlign* pMa, const int32_t kiWidth , const int32_t
pPic->pScreenBlockFeatureStorage = static_cast<SScreenBlockFeatureStorage*> (pMa->WelsMallocz (sizeof (SScreenBlockFeatureStorage), "pScreenBlockFeatureStorage"));
int32_t iReturn = RequestScreenBlockFeatureStorage(pMa, kiWidth, kiHeight, iNeedFeatureStorage,
pPic->pScreenBlockFeatureStorage );
WELS_VERIFY_RETURN_PROC_IF (NULL, ENC_RETURN_SUCCESS != iReturn, FreePicture (pMa, &pPic));
} else {
pPic->pScreenBlockFeatureStorage = NULL;

View File

@ -45,6 +45,18 @@
namespace WelsSVCEnc {
const int32_t QStepx16ByQp[52] = { /* save QStep<<4 for int32_t */
10, 11, 13, 14, 16, 18, /* 0~5 */
20, 22, 26, 28, 32, 36, /* 6~11 */
40, 44, 52, 56, 64, 72, /* 12~17 */
80, 88, 104, 112, 128, 144, /* 18~23 */
160, 176, 208, 224, 256, 288, /* 24~29 */
320, 352, 416, 448, 512, 576, /* 30~35 */
640, 704, 832, 896, 1024, 1152, /* 36~41 */
1280, 1408, 1664, 1792, 2048, 2304, /* 42~47 */
2560, 2816, 3328, 3584 /* 48~51 */
};
static inline void UpdateMeResults( const SMVUnitXY ksBestMv, const uint32_t kiBestSadCost, uint8_t* pRef, SWelsME * pMe )
{
pMe->sMv = ksBestMv;
@ -313,9 +325,9 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe,
#if defined (X86_ASM)
void CalcMvdCostx8_c( uint16_t *pMvdCost, const int32_t kiStartMv, uint16_t* pMvdTable, const uint16_t kiFixedCost )
{
uint16_t *pBaseCost = pMvdCost;
const int32_t kiOffset = (kiStartMv<<2);
uint16_t *pMvd = pMvdTable+kiOffset;
uint16_t *pBaseCost = pMvdCost;
const int32_t kiOffset = (kiStartMv<<2);
uint16_t *pMvd = pMvdTable+kiOffset;
for (int32_t i = 0; i < 8; ++ i) {
pBaseCost[i] = ((*pMvd) + kiFixedCost);
pMvd += 4;
@ -327,58 +339,58 @@ void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
uint8_t* kpEncMb = pMe->pEncMb;
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiRefStride];
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
uint8_t* kpEncMb = pMe->pEncMb;
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiRefStride];
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 : TransposeMatrixBlock8x8_mmx;
PTransposeMatrixBlocksFunc TransposeMatrixBlocks= kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 : TransposeMatrixBlocksx8_mmx;
PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 : TransposeMatrixBlock8x8_mmx;
PTransposeMatrixBlocksFunc TransposeMatrixBlocks= kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 : TransposeMatrixBlocksx8_mmx;
const int32_t kiDiff = kiMaxPos - kiMinPos;
const int32_t kiRowNum = WELS_ALIGN((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum>>4) : (kiRowNum>>3);
int32_t iCountLoop8 = (kiRowNum-kiEdgeBlocks) >> 3;
const int32_t kiRemainingVectors = kiDiff - (iCountLoop8<<3);
const int32_t kiMatrixStride = MAX_VERTICAL_MV_RANGE;
ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixRef, 16, kiMatrixStride, 16 ); // transpose matrix result for ref
ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixEnc, 16, 16, 16 ); // transpose matrix result for enc
assert(kiRowNum <= kiMatrixStride); // make sure effective memory
const int32_t kiDiff = kiMaxPos - kiMinPos;
const int32_t kiRowNum = WELS_ALIGN((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum>>4) : (kiRowNum>>3);
int32_t iCountLoop8 = (kiRowNum-kiEdgeBlocks) >> 3;
const int32_t kiRemainingVectors = kiDiff - (iCountLoop8<<3);
const int32_t kiMatrixStride = MAX_VERTICAL_MV_RANGE;
ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixRef, 16, kiMatrixStride, 16 ); // transpose matrix result for ref
ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixEnc, 16, 16, 16 ); // transpose matrix result for enc
assert(kiRowNum <= kiMatrixStride); // make sure effective memory
TransposeMatrixBlock( &uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride );
TransposeMatrixBlocks( &uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum );
ENFORCE_STACK_ALIGN_1D( uint16_t, uiBaseCost, 8, 16 );
int32_t iTargetPos = kiMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
int32_t iTargetPos = kiMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
int32_t iIndexMinPos;
kpEncMb = &uiMatrixEnc[0][0];
pRef = &uiMatrixRef[0][0];
kpEncMb = &uiMatrixEnc[0][0];
pRef = &uiMatrixRef[0][0];
while(iCountLoop8 > 0) {
CalcMvdCostx8_c(uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
uiCostMin = pSampleSadHor8( kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos );
if (uiCostMin < uiBestCost) {
uiBestCost = uiCostMin;
iBestPos = iTargetPos+iIndexMinPos;
uiBestCost = uiCostMin;
iBestPos = iTargetPos+iIndexMinPos;
}
iTargetPos += 8;
iTargetPos += 8;
pRef += 8;
-- iCountLoop8;
}
if (kiRemainingVectors > 0) {
kpEncMb = pMe->pEncMb;
pRef = &pMe->pColoRefMb[(iTargetPos - kiCurMeBlockPix)*kiRefStride];
kpEncMb = pMe->pEncMb;
pRef = &pMe->pColoRefMb[(iTargetPos - kiCurMeBlockPix)*kiRefStride];
while (iTargetPos < kiMaxPos) {
const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
pRef += kiRefStride;
++iTargetPos;
@ -399,20 +411,20 @@ void HorizontalFullSearchUsingSSE41( void *pFunc, void *vpMe,
const bool bVerticalSearch )
{
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
uint8_t *kpEncMb = pMe->pEncMb;
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
uint8_t *pRef = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
uint8_t *kpEncMb = pMe->pEncMb;
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
uint8_t *pRef = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
ENFORCE_STACK_ALIGN_1D( uint16_t, uiBaseCost, 8, 16 );
const int32_t kiNumVector = kiMaxPos - kiMinPos;
int32_t iCountLoop8 = kiNumVector >> 3;
const int32_t kiRemainingLoop8 = kiNumVector & 7;
int32_t iTargetPos = kiMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
const int32_t kiNumVector = kiMaxPos - kiMinPos;
int32_t iCountLoop8 = kiNumVector >> 3;
const int32_t kiRemainingLoop8 = kiNumVector & 7;
int32_t iTargetPos = kiMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
int32_t iIndexMinPos;
@ -420,20 +432,20 @@ void HorizontalFullSearchUsingSSE41( void *pFunc, void *vpMe,
CalcMvdCostx8_c(uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
uiCostMin = pSampleSadHor8( kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos );
if (uiCostMin < uiBestCost) {
uiBestCost = uiCostMin;
iBestPos = iTargetPos+iIndexMinPos;
uiBestCost = uiCostMin;
iBestPos = iTargetPos+iIndexMinPos;
}
iTargetPos += 8;
iTargetPos += 8;
pRef += 8;
-- iCountLoop8;
}
if ( kiRemainingLoop8 > 0 ) {
while (iTargetPos < kiMaxPos) {
const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
++pRef;
++iTargetPos;
@ -447,10 +459,10 @@ void HorizontalFullSearchUsingSSE41( void *pFunc, void *vpMe,
}
}
#endif
void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
@ -482,9 +494,9 @@ void LineFullSearch_c( void *pFunc, void *vpMe,
}
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SWelsME * pMe,
const SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride) {
PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
const SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride) {
PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
const int32_t iCurMeBlockQpelPixX = ((iCurMeBlockPixX)<<2);
@ -515,9 +527,10 @@ void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SWelsME * pMe,
// Feature Search Basics
/////////////////////////
//memory related
int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFeatureStrategyIndex,
const int32_t kiFrameWidth, const int32_t kiFrameHeight, const bool bFme8x8,
uint16_t*& pFeatureOfBlock) {
int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
SFeatureSearchPreparation* pFeatureSearchPreparation) {
const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage>>16;
const bool bFme8x8 = ((iNeedFeatureStorage & 0x0000FF & ME_FME)==ME_FME);
const int32_t kiMarginSize = bFme8x8?8:16;
const int32_t kiFrameSize = (kiFrameWidth-kiMarginSize) * (kiFrameHeight-kiMarginSize);
int32_t iListOfFeatureOfBlock;
@ -528,9 +541,14 @@ int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFeat
iListOfFeatureOfBlock = sizeof(uint16_t) * kiFrameSize +
(kiFrameWidth-kiMarginSize) * sizeof(uint32_t) + kiFrameWidth * 8 * sizeof(uint8_t);
}
pFeatureOfBlock =
pFeatureSearchPreparation->pFeatureOfBlock =
(uint16_t *)pMa->WelsMalloc(iListOfFeatureOfBlock, "pFeatureOfBlock");
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pFeatureOfBlock)
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == (pFeatureSearchPreparation->pFeatureOfBlock) )
pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
pFeatureSearchPreparation->bFMESwitchFlag = true;
pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
pFeatureSearchPreparation->iHighFreMbCount = 0;
return ENC_RETURN_SUCCESS;
}
@ -568,7 +586,13 @@ int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFra
pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMalloc(2*kiFrameSize*sizeof(uint16_t), "pScreenBlockFeatureStorage->pLocationPointer");
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer)
pScreenBlockFeatureStorage->iActualListSize = kiListSize;
pScreenBlockFeatureStorage->pFeatureOfBlockPointer = NULL;
pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
pScreenBlockFeatureStorage->iActualListSize = kiListSize;
memset(pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL*sizeof(uint32_t));
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
return ENC_RETURN_SUCCESS;
}
int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage ) {
@ -588,22 +612,18 @@ int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeature
}
//preprocess related
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
{
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride) {
int32_t iSum = 0, i;
for(i = 0; i < 8; i++)
{
for(i = 0; i < 8; i++) {
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
pRef += kiRefStride;
}
return iSum;
}
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
{
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride) {
int32_t iSum = 0, i;
for(i = 0; i < 16; i++)
{
for(i = 0; i < 16; i++) {
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11];
@ -681,6 +701,7 @@ void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t
pSrcPointer += kiWidth;
}
}
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
@ -710,10 +731,17 @@ void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
}
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
CalculateFeatureOfBlock(pFunc, pRef, pScreenBlockFeatureStorage );
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = true;
uint32_t uiRefPictureAvgQstepx16 = QStepx16ByQp[WelsMedian(0, pRef->iFrameAverageQp, 51)];
uint32_t uiSadCostThreshold16x16 = ((30 * (uiRefPictureAvgQstepx16 + 160))>>3);
pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x16] = uiSadCostThreshold16x16;
pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x8] = (uiSadCostThreshold16x16>>2);
pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x8]
= pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x16]
= pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_4x4] = UINT_MAX;
}
//search related