Merge pull request #621 from sijchen/fme_merge42

[Encoder ME] add Preprocess functions for FME
This commit is contained in:
ruil2 2014-04-04 09:48:44 +08:00
commit 646d943200
4 changed files with 346 additions and 185 deletions

View File

@ -42,11 +42,16 @@ namespace WelsSVCEnc {
#define LIST_SIZE 0x10000 //(256*256)
typedef struct TagScreenBlockFeatureStorage
{
//Input
uint16_t* pFeatureOfBlockPointer; // Pointer to pFeatureOfBlock
int32_t iIs16x16; //Feature block size
uint8_t uiFeatureStrategyIndex;// index of hash strategy
//Modify
uint32_t* pTimesOfFeatureValue; // times of every value in Feature
uint16_t** pLocationOfFeature; // uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i;
uint16_t* pLocationPointer; // buffer of position array
int32_t iActualListSize; // actual list size
uint32_t uiSadCostThreshold[BLOCK_SIZE_ALL];
bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
} SScreenBlockFeatureStorage; //should be stored with RefPic, one for each frame

View File

@ -193,6 +193,7 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe,
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost);
// Cross Search Basics
void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
@ -205,6 +206,17 @@ void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
const bool bVerticalSearch );
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
// Feature Search Basics
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
//inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange,
SMVUnitXY* pMvMin, SMVUnitXY* pMvMax)

View File

@ -146,6 +146,9 @@ typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch );
typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
#define MAX_BLOCK_TYPE 5 // prev 7
typedef struct TagSampleDealingFunc {
@ -205,6 +208,8 @@ struct TagWelsFuncPointerList {
PCalculateSatdFunc pfCalculateSatd;
PCheckDirectionalMv pfCheckDirectionalMv;
PLineFullSearchFunc pfLineFullSearch;
PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PCopyFunc pfCopy16x16NotAligned; //md.c

View File

@ -39,6 +39,7 @@
*/
#include "cpu_core.h"
#include "ls_defines.h"
#include "svc_motion_estimate.h"
namespace WelsSVCEnc {
@ -67,6 +68,13 @@ void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
pFuncList->pfLineFullSearch = LineFullSearch_c;
if ( uiCpuFlag & WELS_CPU_SSE41 ) {
}
//for feature search
pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c;
//TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
}
}
@ -440,13 +448,144 @@ int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeature
}
return ENC_RETURN_UNEXPECTED;
}
//preprocess related
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
{
int32_t iSum = 0, i;
for(i = 0; i < 8; i++)
{
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
pRef += kiRefStride;
}
return iSum;
}
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
{
int32_t iSum = 0, i;
for(i = 0; i < 16; i++)
{
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11];
iSum += pRef[12] + pRef[13] + pRef[14] + pRef[15];
pRef += kiRefStride;
}
return iSum;
}
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
{
int32_t x, y;
uint8_t *pRef;
uint16_t *pBuffer;
int32_t iSum;
for(y = 0; y < kiHeight; y++) {
pRef = pRefPicture + kiRefStride * y;
pBuffer = pFeatureOfBlock + kiWidth * y;
for(x = 0; x < kiWidth; x++) {
iSum = SumOf8x8SingleBlock_c(pRef + x, kiRefStride);
pBuffer[x] = iSum;
pTimesOfFeatureValue[iSum]++;
}
}
}
void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
{//TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
int32_t x, y;
uint8_t *pRef;
uint16_t *pBuffer;
int32_t iSum;
for(y = 0; y < kiHeight; y++) {
pRef = pRefPicture + kiRefStride * y;
pBuffer = pFeatureOfBlock + kiWidth * y;
for(x = 0; x < kiWidth; x++) {
iSum = SumOf16x16SingleBlock_c(pRef + x, kiRefStride);
pBuffer[x] = iSum;
pTimesOfFeatureValue[iSum]++;
}
}
}
void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
{
//assign location pointer
uint16_t *pBufPos = pBuf;
for( int32_t i = 0 ; i < kiListSize; ++i )
{
pLocationOfFeature[i] =
pFeatureValuePointerList[i] = pBufPos;
pBufPos += (pTimesOfFeatureValue[i]<<1);
}
}
void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
uint16_t** pFeatureValuePointerList )
{
//assign each pixel's position
uint16_t* pSrcPointer = pFeatureOfBlock;
int32_t iQpelY = 0;
for(int32_t y = 0; y < kiHeight; y++)
{
for(int32_t x = 0; x < kiWidth; x++)
{
uint16_t uiFeature = pSrcPointer[x];
ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
pFeatureValuePointerList[uiFeature] += 2;
}
iQpelY += 4;
pSrcPointer += kiWidth;
}
}
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer;
uint8_t* pRefData = pRef->pData[0];
const int32_t iRefStride = pRef->iLineSize[0];
int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
bool bUseSum = (pScreenBlockFeatureStorage->uiFeatureStrategyIndex == 0);
const int32_t iEdgeDiscard = (iIs16x16?16:8);//this is to save complexity of padding on pRef
const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard;
const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard;
const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize;
uint16_t* pFeatureValuePointerList[WELS_MAX(LIST_SIZE_SUM_16x16,LIST_SIZE_MSE_16x16)] = {0};
memset(pTimesOfFeatureValue, 0, sizeof(int32_t)*kiActualListSize);
(pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16])(pRefData,iWidth, kiHeight, iRefStride, pFeatureOfBlock, pTimesOfFeatureValue);
//assign pLocationOfFeature pointer
InitializeHashforFeature_c( pTimesOfFeatureValue, pBuf, kiActualListSize,
pLocationOfFeature, pFeatureValuePointerList );
//assign each pixel's pLocationOfFeature
FillQpelLocationByFeatureValue_c(pFeatureOfBlock, iWidth, kiHeight, pFeatureValuePointerList);
}
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
CalculateFeatureOfBlock(pFunc, pRef, pScreenBlockFeatureStorage );
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = true;
}
//search related
void SetFeatureSearchIn( SWelsFuncPtrList *pFunc, const SWelsME& sMe,
const SSlice *pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
const int32_t kiEncStride, const int32_t kiRefStride,
SFeatureSearchIn* pFeatureSearchIn ) {
pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
//pFeatureSearchIn->iFeatureOfCurrent=
pFeatureSearchIn->iFeatureOfCurrent=pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16==sMe.uiBlockSize](sMe.pEncMb, kiEncStride);
pFeatureSearchIn->pEnc = sMe.pEncMb;
pFeatureSearchIn->pColoRef = sMe.pColoRefMb;