Merge pull request #621 from sijchen/fme_merge42
[Encoder ME] add Preprocess functions for FME
This commit is contained in:
commit
646d943200
@ -42,11 +42,16 @@ namespace WelsSVCEnc {
|
||||
#define LIST_SIZE 0x10000 //(256*256)
|
||||
typedef struct TagScreenBlockFeatureStorage
|
||||
{
|
||||
//Input
|
||||
uint16_t* pFeatureOfBlockPointer; // Pointer to pFeatureOfBlock
|
||||
int32_t iIs16x16; //Feature block size
|
||||
uint8_t uiFeatureStrategyIndex;// index of hash strategy
|
||||
|
||||
//Modify
|
||||
uint32_t* pTimesOfFeatureValue; // times of every value in Feature
|
||||
uint16_t** pLocationOfFeature; // uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i;
|
||||
uint16_t* pLocationPointer; // buffer of position array
|
||||
int32_t iActualListSize; // actual list size
|
||||
|
||||
uint32_t uiSadCostThreshold[BLOCK_SIZE_ALL];
|
||||
bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
|
||||
} SScreenBlockFeatureStorage; //should be stored with RefPic, one for each frame
|
||||
|
@ -193,6 +193,7 @@ bool CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad, void * vpMe,
|
||||
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
|
||||
int32_t& iBestSadCost);
|
||||
|
||||
// Cross Search Basics
|
||||
void LineFullSearch_c( void *pFunc, void *vpMe,
|
||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||
@ -205,6 +206,17 @@ void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
|
||||
const bool bVerticalSearch );
|
||||
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
|
||||
|
||||
// Feature Search Basics
|
||||
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
|
||||
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
|
||||
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
|
||||
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
|
||||
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
|
||||
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
|
||||
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
|
||||
void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
|
||||
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
|
||||
//inline functions
|
||||
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
|
||||
const int32_t kiMaxMvRange,
|
||||
SMVUnitXY* pMvMin, SMVUnitXY* pMvMax)
|
||||
|
@ -146,6 +146,9 @@ typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
|
||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
||||
const bool bVerticalSearch );
|
||||
typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
|
||||
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
|
||||
typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
|
||||
|
||||
#define MAX_BLOCK_TYPE 5 // prev 7
|
||||
typedef struct TagSampleDealingFunc {
|
||||
@ -205,6 +208,8 @@ struct TagWelsFuncPointerList {
|
||||
PCalculateSatdFunc pfCalculateSatd;
|
||||
PCheckDirectionalMv pfCheckDirectionalMv;
|
||||
PLineFullSearchFunc pfLineFullSearch;
|
||||
PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
|
||||
PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
|
||||
|
||||
PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
|
||||
PCopyFunc pfCopy16x16NotAligned; //md.c
|
||||
|
@ -39,6 +39,7 @@
|
||||
*/
|
||||
|
||||
#include "cpu_core.h"
|
||||
#include "ls_defines.h"
|
||||
#include "svc_motion_estimate.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
@ -67,6 +68,13 @@ void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScre
|
||||
pFuncList->pfLineFullSearch = LineFullSearch_c;
|
||||
if ( uiCpuFlag & WELS_CPU_SSE41 ) {
|
||||
}
|
||||
|
||||
//for feature search
|
||||
pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
|
||||
pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c;
|
||||
//TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
|
||||
pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
|
||||
pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
|
||||
}
|
||||
}
|
||||
|
||||
@ -440,13 +448,144 @@ int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeature
|
||||
}
|
||||
return ENC_RETURN_UNEXPECTED;
|
||||
}
|
||||
|
||||
//preprocess related
|
||||
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
|
||||
{
|
||||
int32_t iSum = 0, i;
|
||||
for(i = 0; i < 8; i++)
|
||||
{
|
||||
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
|
||||
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
|
||||
pRef += kiRefStride;
|
||||
}
|
||||
return iSum;
|
||||
}
|
||||
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
|
||||
{
|
||||
int32_t iSum = 0, i;
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
|
||||
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
|
||||
iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11];
|
||||
iSum += pRef[12] + pRef[13] + pRef[14] + pRef[15];
|
||||
pRef += kiRefStride;
|
||||
}
|
||||
return iSum;
|
||||
}
|
||||
|
||||
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
|
||||
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
|
||||
{
|
||||
int32_t x, y;
|
||||
uint8_t *pRef;
|
||||
uint16_t *pBuffer;
|
||||
int32_t iSum;
|
||||
for(y = 0; y < kiHeight; y++) {
|
||||
pRef = pRefPicture + kiRefStride * y;
|
||||
pBuffer = pFeatureOfBlock + kiWidth * y;
|
||||
for(x = 0; x < kiWidth; x++) {
|
||||
iSum = SumOf8x8SingleBlock_c(pRef + x, kiRefStride);
|
||||
|
||||
pBuffer[x] = iSum;
|
||||
pTimesOfFeatureValue[iSum]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
|
||||
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
|
||||
{//TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
|
||||
int32_t x, y;
|
||||
uint8_t *pRef;
|
||||
uint16_t *pBuffer;
|
||||
int32_t iSum;
|
||||
for(y = 0; y < kiHeight; y++) {
|
||||
pRef = pRefPicture + kiRefStride * y;
|
||||
pBuffer = pFeatureOfBlock + kiWidth * y;
|
||||
for(x = 0; x < kiWidth; x++) {
|
||||
iSum = SumOf16x16SingleBlock_c(pRef + x, kiRefStride);
|
||||
|
||||
pBuffer[x] = iSum;
|
||||
pTimesOfFeatureValue[iSum]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
|
||||
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
|
||||
{
|
||||
//assign location pointer
|
||||
uint16_t *pBufPos = pBuf;
|
||||
for( int32_t i = 0 ; i < kiListSize; ++i )
|
||||
{
|
||||
pLocationOfFeature[i] =
|
||||
pFeatureValuePointerList[i] = pBufPos;
|
||||
pBufPos += (pTimesOfFeatureValue[i]<<1);
|
||||
}
|
||||
}
|
||||
void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
|
||||
uint16_t** pFeatureValuePointerList )
|
||||
{
|
||||
//assign each pixel's position
|
||||
uint16_t* pSrcPointer = pFeatureOfBlock;
|
||||
int32_t iQpelY = 0;
|
||||
for(int32_t y = 0; y < kiHeight; y++)
|
||||
{
|
||||
for(int32_t x = 0; x < kiWidth; x++)
|
||||
{
|
||||
uint16_t uiFeature = pSrcPointer[x];
|
||||
ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
|
||||
pFeatureValuePointerList[uiFeature] += 2;
|
||||
}
|
||||
iQpelY += 4;
|
||||
pSrcPointer += kiWidth;
|
||||
}
|
||||
}
|
||||
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
|
||||
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
|
||||
{
|
||||
uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
|
||||
uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
|
||||
uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
|
||||
uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer;
|
||||
|
||||
uint8_t* pRefData = pRef->pData[0];
|
||||
const int32_t iRefStride = pRef->iLineSize[0];
|
||||
int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
|
||||
bool bUseSum = (pScreenBlockFeatureStorage->uiFeatureStrategyIndex == 0);
|
||||
const int32_t iEdgeDiscard = (iIs16x16?16:8);//this is to save complexity of padding on pRef
|
||||
const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard;
|
||||
const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard;
|
||||
const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize;
|
||||
uint16_t* pFeatureValuePointerList[WELS_MAX(LIST_SIZE_SUM_16x16,LIST_SIZE_MSE_16x16)] = {0};
|
||||
|
||||
memset(pTimesOfFeatureValue, 0, sizeof(int32_t)*kiActualListSize);
|
||||
(pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16])(pRefData,iWidth, kiHeight, iRefStride, pFeatureOfBlock, pTimesOfFeatureValue);
|
||||
|
||||
//assign pLocationOfFeature pointer
|
||||
InitializeHashforFeature_c( pTimesOfFeatureValue, pBuf, kiActualListSize,
|
||||
pLocationOfFeature, pFeatureValuePointerList );
|
||||
|
||||
//assign each pixel's pLocationOfFeature
|
||||
FillQpelLocationByFeatureValue_c(pFeatureOfBlock, iWidth, kiHeight, pFeatureValuePointerList);
|
||||
}
|
||||
|
||||
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
|
||||
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
|
||||
{
|
||||
CalculateFeatureOfBlock(pFunc, pRef, pScreenBlockFeatureStorage );
|
||||
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = true;
|
||||
}
|
||||
|
||||
//search related
|
||||
void SetFeatureSearchIn( SWelsFuncPtrList *pFunc, const SWelsME& sMe,
|
||||
const SSlice *pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
|
||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||
SFeatureSearchIn* pFeatureSearchIn ) {
|
||||
pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
|
||||
//pFeatureSearchIn->iFeatureOfCurrent=
|
||||
pFeatureSearchIn->iFeatureOfCurrent=pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16==sMe.uiBlockSize](sMe.pEncMb, kiEncStride);
|
||||
|
||||
pFeatureSearchIn->pEnc = sMe.pEncMb;
|
||||
pFeatureSearchIn->pColoRef = sMe.pColoRefMb;
|
||||
|
Loading…
x
Reference in New Issue
Block a user