Merge pull request #1490 from sijchen/after_review

[Encoder] Fixing for large number of slices
This commit is contained in:
ruil2 2014-11-04 15:12:38 +08:00
commit 8ed4e83e93
8 changed files with 87 additions and 32 deletions

View File

@ -42,6 +42,7 @@
#include <math.h>
#include <assert.h>
#include <string.h>
#include "typedefs.h"
@ -273,8 +274,36 @@ static inline bool WELS_POWER2_IF (uint32_t v) {
#endif
inline bool CheckInRangeCloseOpen (const int16_t kiCurrent, const int16_t kiMin, const int16_t kiMax) {
return ((kiCurrent >= kiMin) && (kiCurrent < kiMax));
return ((kiCurrent >= kiMin) && (kiCurrent < kiMax));
}
static inline void WelsSetMemUint32_c (uint32_t* pDst, uint32_t iValue, int32_t iSizeOfData) {
for (int i = 0; i < iSizeOfData; i++) {
pDst[i] = iValue;
}
}
static inline void WelsSetMemUint16_c (uint16_t* pDst, uint16_t iValue, int32_t iSizeOfData) {
for (int i = 0; i < iSizeOfData; i++) {
pDst[i] = iValue;
}
}
inline void WelsSetMemMultiplebytes_c (void* pDst, uint32_t iValue, int32_t iSizeOfData, int32_t iDataLengthOfData) {
assert (4 == iDataLengthOfData || 2 == iDataLengthOfData || 1 == iDataLengthOfData);
// TODO: consider add assembly for these functions
if (0 != iValue) {
if (4 == iDataLengthOfData) {
WelsSetMemUint32_c (static_cast<uint32_t*> (pDst), static_cast<uint32_t> (iValue), iSizeOfData);
} else if (2 == iDataLengthOfData) {
WelsSetMemUint16_c (static_cast<uint16_t*> (pDst), static_cast<uint16_t> (iValue), iSizeOfData);
} else {
memset (static_cast<uint8_t*> (pDst), static_cast<uint8_t> (iValue), iSizeOfData);
}
} else {
memset (static_cast<uint8_t*> (pDst), 0, iSizeOfData * iDataLengthOfData);
}
}
#endif//WELS_MACRO_UTILIZATIONS_H__

View File

@ -68,12 +68,12 @@ SMVUnitXY sP16x16Mv;
uint8_t uiLumaQp; // uiLumaQp: pPps->iInitialQp + sSliceHeader->delta_qp + mb->dquant.
uint8_t uiChromaQp;
uint8_t uiSliceIdc; // AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
uint16_t uiSliceIdc; // 2^16=65536 > MaxFS(36864) of level 5.1; AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
uint32_t uiChromPredMode;
int32_t iLumaDQp;
SMVUnitXY sMvd[4];
int32_t iCbpDc;
uint8_t reserved_filling_bytes[1]; // filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
//uint8_t reserved_filling_bytes[1]; // not deleting this line for further changes of this structure. filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
} SMB, *PMb;
}

View File

@ -80,7 +80,7 @@ int16_t iMbWidth; /* width of picture size in mb */
int16_t iMbHeight; /* height of picture size in mb */
int16_t iSliceNumInFrame; /* count number of slices in frame; */
int32_t iMbNumInFrame; /* count number of MBs in frame */
uint8_t* pOverallMbMap; /* overall MB map in frame, store virtual slice idc; */
uint16_t* pOverallMbMap; /* overall MB map in frame, store virtual slice idc; */
int16_t* pFirstMbInSlice; /* first MB address top-left based in every slice respectively; */
int32_t* pCountMbNumInSlice; /* count number of MBs in every slice respectively; */
uint32_t uiSliceSizeConstraint;/*in byte*/
@ -140,7 +140,7 @@ void UninitSlicePEncCtx (SSliceCtx* pSliceCtx, CMemoryAlign* pMa);
*
* \return uiSliceIdc - successful; (uint8_t)(-1) - failed;
*/
uint8_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY);
uint16_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY);
/*!
* \brief Get first mb in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO)

View File

@ -597,7 +597,7 @@ static void InitMbInfo (sWelsEncCtx* pEnc, SMB* pList, SDqLayer* pLayer, con
bool bLeftTop;
bool bRightTop;
int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
uint8_t uiSliceIdc;
uint16_t uiSliceIdc;
pList[iIdx].iMbX = pEnc->pStrideTab->pMbIndexX[kiDlayerId][iIdx];
pList[iIdx].iMbY = pEnc->pStrideTab->pMbIndexY[kiDlayerId][iIdx];
@ -2272,7 +2272,7 @@ void DynslcUpdateMbNeighbourInfoListForAllSlices (SSliceCtx* pSliceCtx, SMB* pMb
bool bTop;
bool bLeftTop;
bool bRightTop;
int32_t uiSliceIdc;
uint16_t uiSliceIdc;
int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
uiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
@ -2347,7 +2347,8 @@ void UpdateSlicepEncCtxWithPartition (SSliceCtx* pSliceCtx, int32_t iPartitionNu
}
pSliceCtx->pFirstMbInSlice[i] = iFirstMbIdx;
memset (pSliceCtx->pOverallMbMap + iFirstMbIdx, (uint8_t)i, pSliceCtx->pCountMbNumInSlice[i]*sizeof (uint8_t));
WelsSetMemMultiplebytes_c(pSliceCtx->pOverallMbMap + iFirstMbIdx, i,
pSliceCtx->pCountMbNumInSlice[i], sizeof(uint16_t));
// for next partition(or pSlice)
iFirstMbIdx += pSliceCtx->pCountMbNumInSlice[i];
@ -3924,15 +3925,19 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
iMaxSliceNum *= SLICE_NUM_EXPAND_COEF;
SWelsNalRaw* pNalList = (SWelsNalRaw*)pMA->WelsMalloc (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList");
if (NULL == pNalList)
if (NULL == pNalList) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalList is NULL");
return ENC_RETURN_MEMALLOCERR;
}
memcpy (pNalList, pCtx->pOut->sNalList, sizeof (SWelsNalRaw) * pCtx->pOut->iCountNals);
pMA->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList");
pCtx->pOut->sNalList = pNalList;
int32_t* pNalLen = (int32_t*)pMA->WelsMalloc (iCountNals * sizeof (int32_t), "pOut->pNalLen");
if (NULL == pNalLen)
if (NULL == pNalLen) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalLen is NULL");
return ENC_RETURN_MEMALLOCERR;
}
memcpy (pNalLen, pCtx->pOut->pNalLen, sizeof (int32_t) * pCtx->pOut->iCountNals);
pMA->WelsFree (pCtx->pOut->pNalLen, "pOut->pNalLen");
pCtx->pOut->pNalLen = pNalLen;
@ -3948,8 +3953,10 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
}
SSlice* pSlice = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "Slice");
if (NULL == pSlice)
if (NULL == pSlice) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pSlice is NULL");
return ENC_RETURN_MEMALLOCERR;
}
memcpy (pSlice, pCurLayer->sLayerInfo.pSliceInLayer, sizeof (SSlice) * iMaxSliceNumOld);
int32_t uiSliceIdx;
uiSliceIdx = iMaxSliceNumOld;
@ -3963,8 +3970,11 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
pSliceIdx->pSliceBsa = &pCtx->pSliceBs[uiSliceIdx].sBsWrite;
else
pSliceIdx->pSliceBsa = &pCtx->pOut->sBsWrite;
if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA))
if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA)) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc MbCache not successful at slice_idx=%d (max-slice=%d)",
uiSliceIdx, iMaxSliceNum);
return ENC_RETURN_MEMALLOCERR;
}
pSliceIdx->bSliceHeaderExtFlag = pBaseSlice->bSliceHeaderExtFlag;
pSHExt->sSliceHeader.iPpsId = pBaseSHExt->sSliceHeader.iPpsId;
@ -3983,8 +3993,10 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
pCurLayer->sLayerInfo.pSliceInLayer = pSlice;
int16_t* pFirstMbInSlice = (int16_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int16_t), "pSliceSeg->pFirstMbInSlice");
if (NULL == pFirstMbInSlice)
if (NULL == pFirstMbInSlice) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pFirstMbInSlice is NULL");
return ENC_RETURN_MEMALLOCERR;
}
memset (pFirstMbInSlice, 0, sizeof (int16_t) * iMaxSliceNum);
memcpy (pFirstMbInSlice, pCurLayer->pSliceEncCtx->pFirstMbInSlice, sizeof (int16_t) * iMaxSliceNumOld);
pMA->WelsFree (pCurLayer->pSliceEncCtx->pFirstMbInSlice, "pSliceSeg->pFirstMbInSlice");
@ -3992,8 +4004,10 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
int32_t* pCountMbNumInSlice = (int32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int32_t),
"pSliceSeg->pCountMbNumInSlice");
if (NULL == pCountMbNumInSlice)
if (NULL == pCountMbNumInSlice) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc pCountMbNumInSlice not successful");
return ENC_RETURN_MEMALLOCERR;
}
memcpy (pCountMbNumInSlice, pCurLayer->pSliceEncCtx->pCountMbNumInSlice, sizeof (int32_t) * iMaxSliceNumOld);
uiSliceIdx = iMaxSliceNumOld;
while (uiSliceIdx < iMaxSliceNum) {
@ -4004,8 +4018,10 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
pCurLayer->pSliceEncCtx->pCountMbNumInSlice = pCountMbNumInSlice;
SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC");
if (NULL == pSlcingOverRc)
if (NULL == pSlcingOverRc) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSlcingOverRc not successful");
return ENC_RETURN_MEMALLOCERR;
}
memcpy (pSlcingOverRc, pCtx->pWelsSvcRc->pSlicingOverRc, sizeof (SRCSlicing) * iMaxSliceNumOld);
uiSliceIdx = iMaxSliceNumOld;
SRCSlicing* pSORC = &pSlcingOverRc[uiSliceIdx];
@ -4069,9 +4085,13 @@ int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx,
if (iSliceIdx >= (pSliceCtx->iMaxSliceNumConstraint - kiSliceIdxStep)) { // insufficient memory in pSliceInLayer[]
if (pCtx->iActiveThreadsNum == 1) {
if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) //only single thread support re-alloc now
//only single thread support re-alloc now
if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::WelsCodeOnePicPartition: DynSliceRealloc not successful");
return ENC_RETURN_MEMALLOCERR;
}
} else if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::WelsCodeOnePicPartition: iSliceIdx(%d) over iMaxSliceNumConstraint(%d)", iSliceIdx, pSliceCtx->iMaxSliceNumConstraint);
return ENC_RETURN_MEMALLOCERR;
}
}

View File

@ -73,7 +73,7 @@ namespace WelsEnc {
void UpdateMbListNeighborParallel (SSliceCtx* pSliceCtx,
SMB* pMbList,
const int32_t uiSliceIdc) {
const uint8_t* kpMbMap = pSliceCtx->pOverallMbMap;
const uint16_t* kpMbMap = pSliceCtx->pOverallMbMap;
const int32_t kiMbWidth = pSliceCtx->iMbWidth;
int32_t iIdx = pSliceCtx->pFirstMbInSlice[uiSliceIdc];
const int32_t kiEndMbInSlice = iIdx + pSliceCtx->pCountMbNumInSlice[uiSliceIdc] - 1;

View File

@ -78,7 +78,8 @@ int32_t AssignMbMapMultipleSlices (SSliceCtx* pSliceSeg, const SSliceConfig* kpM
const int16_t kiFirstMb = uiSliceIdx * kiMbWidth;
pSliceSeg->pCountMbNumInSlice[uiSliceIdx] = kiMbWidth;
pSliceSeg->pFirstMbInSlice[uiSliceIdx] = kiFirstMb;
memset (pSliceSeg->pOverallMbMap + kiFirstMb, (uint8_t)uiSliceIdx, kiMbWidth * sizeof (uint8_t));
WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap + kiFirstMb, uiSliceIdx,
kiMbWidth, sizeof(uint16_t));
++ uiSliceIdx;
}
@ -89,7 +90,7 @@ int32_t AssignMbMapMultipleSlices (SSliceCtx* pSliceSeg, const SSliceConfig* kpM
const int32_t* kpSlicesAssignList = (int32_t*) & (kpMso->sSliceArgument.uiSliceMbNum[0]);
const int32_t kiCountNumMbInFrame = pSliceSeg->iMbNumInFrame;
const int32_t kiCountSliceNumInFrame = pSliceSeg->iSliceNumInFrame;
int32_t iSliceIdx = 0;
uint16_t iSliceIdx = 0;
int16_t iMbIdx = 0;
do {
@ -392,7 +393,7 @@ int32_t InitSliceSegment (SSliceCtx* pSliceSeg,
}
if (SM_SINGLE_SLICE == uiSliceMode) {
pSliceSeg->pOverallMbMap = (uint8_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint8_t), "pSliceSeg->pOverallMbMap");
pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
pSliceSeg->iSliceNumInFrame = 1;
@ -419,11 +420,12 @@ int32_t InitSliceSegment (SSliceCtx* pSliceSeg,
&& uiSliceMode != SM_DYN_SLICE && uiSliceMode != SM_AUTO_SLICE)
return 1;
pSliceSeg->pOverallMbMap = (uint8_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint8_t), "pSliceSeg->pOverallMbMap");
pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
memset (pSliceSeg->pOverallMbMap, 0, kiCountMbNum * sizeof (uint8_t));
WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap, 0, kiCountMbNum, sizeof(uint16_t));
//SM_DYN_SLICE: init, set pSliceSeg->iSliceNumInFrame = 1;
pSliceSeg->iSliceNumInFrame = GetInitialSliceNum (kiMbWidth, kiMbHeight, pMso);
@ -550,10 +552,10 @@ void UninitSlicePEncCtx (SSliceCtx* pSliceCtx, CMemoryAlign* pMa) {
*
* \return uiSliceIdc - successful; -1 - failed;
*/
uint8_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY) {
uint16_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY) {
if (NULL != pSliceCtx && kiMbXY < pSliceCtx->iMbNumInFrame && kiMbXY >= 0)
return pSliceCtx->pOverallMbMap[ kiMbXY ];
return (uint8_t) (-1);
return (uint16_t) (-1);
}
/*!
@ -688,7 +690,8 @@ int32_t DynamicAdjustSlicePEncCtxAll (SSliceCtx* pSliceCtx,
pSliceCtx->pFirstMbInSlice[iSliceIdx] = iFirstMbIdx;
pSliceCtx->pCountMbNumInSlice[iSliceIdx] = kiSliceRun;
memset (pSliceCtx->pOverallMbMap + iFirstMbIdx, (uint8_t)iSliceIdx, kiSliceRun * sizeof (uint8_t));
WelsSetMemMultiplebytes_c(pSliceCtx->pOverallMbMap + iFirstMbIdx, iSliceIdx,
kiSliceRun, sizeof(uint16_t));
iFirstMbIdx += kiSliceRun;

View File

@ -776,7 +776,7 @@ void UpdateMbNeighbourInfoForNextSlice (SSliceCtx* pSliceCtx,
bool bLeftTop;
bool bRightTop;
int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
const uint8_t kuiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
const uint16_t kuiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
pMb->uiSliceIdc = kuiSliceIdc;
iLeftXY = kiMbXY - 1;
@ -814,9 +814,9 @@ void AddSliceBoundary (sWelsEncCtx* pEncCtx, SSlice* pCurSlice, SSliceCtx* pSlic
int32_t iFirstMbIdxOfNextSlice, const int32_t kiLastMbIdxInPartition) {
SDqLayer* pCurLayer = pEncCtx->pCurDqLayer;
int32_t iCurMbIdx = pCurMb->iMbXY;
int32_t iCurSliceIdc = pSliceCtx->pOverallMbMap[ iCurMbIdx ];
uint16_t iCurSliceIdc = pSliceCtx->pOverallMbMap[ iCurMbIdx ];
const int32_t kiSliceIdxStep = pEncCtx->iActiveThreadsNum;
int32_t iNextSliceIdc = iCurSliceIdc + kiSliceIdxStep;
uint16_t iNextSliceIdc = iCurSliceIdc + kiSliceIdxStep;
SSlice* pNextSlice = NULL;
SMB* pMbList = pCurLayer->sMbDataP;
@ -839,9 +839,8 @@ void AddSliceBoundary (sWelsEncCtx* pEncCtx, SSlice* pCurSlice, SSliceCtx* pSlic
sizeof (SSliceHeaderExt)); // confirmed_safe_unsafe_usage
pSliceCtx->pFirstMbInSlice[iNextSliceIdc] = iFirstMbIdxOfNextSlice;
memset (pSliceCtx->pOverallMbMap + iFirstMbIdxOfNextSlice, (uint8_t)iNextSliceIdc,
(kiLastMbIdxInPartition - iFirstMbIdxOfNextSlice + 1)*sizeof (uint8_t));
WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdxOfNextSlice, iNextSliceIdc,
(kiLastMbIdxInPartition - iFirstMbIdxOfNextSlice + 1), sizeof(uint16_t));
//DYNAMIC_SLICING_ONE_THREAD: update pMbList slice_neighbor_info
UpdateMbNeighbourInfoForNextSlice (pSliceCtx, pMbList, iFirstMbIdxOfNextSlice, kiLastMbIdxInPartition);
@ -875,6 +874,10 @@ bool DynSlcJudgeSliceBoundaryStepBack (void* pCtx, void* pSlice, SSliceCtx* pSli
&& JUMPPACKETSIZE_JUDGE (uiLen, iCurMbIdx, pSliceCtx->uiSliceSizeConstraint)) /*jump_avoiding_pack_exceed*/
&& kbCurMbNotLastMbOfCurPartition) { //decide to add new pSlice
WelsLog (&pEncCtx->sLogCtx, WELS_LOG_DETAIL,
"DynSlcJudgeSliceBoundaryStepBack: AddSliceBoundary: iCurMbIdx=%d, uiLen=%d, uiSliceIdx=%d", iCurMbIdx, uiLen,
pCurSlice->uiSliceIdx);
if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1) {
WelsMutexLock (&pEncCtx->pSliceThreading->mutexSliceNumUpdate);
//lock the acessing to this variable: pSliceCtx->iSliceNumInFrame

View File

@ -718,7 +718,7 @@ int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFra
pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
pScreenBlockFeatureStorage->iActualListSize = kiListSize;
memset (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL * sizeof (uint32_t));
WelsSetMemMultiplebytes_c (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL, sizeof(uint32_t));
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
return ENC_RETURN_SUCCESS;