Merge pull request #2267 from shihuade/MultiThread_V4.2_SSliceCtx_SliceConSumeTime_Pull

remove pSliceConsumeTime in SSliceCtx and SliceThreading
This commit is contained in:
HaiboZhu 2015-11-24 10:35:43 +08:00
commit 4c19823d44
8 changed files with 36 additions and 72 deletions

View File

@ -80,7 +80,6 @@ WELS_EVENT pThreadMasterEvent[MAX_THREADS_NUM]; // event
WELS_MUTEX mutexSliceNumUpdate; // for dynamic slicing mode MT
uint32_t* pSliceConsumeTime[MAX_DEPENDENCY_LAYER]; // consuming time for each slice, [iSpatialIdx][uiSliceIdx]
int32_t* pSliceComplexRatio[MAX_DEPENDENCY_LAYER]; // *INT_MULTIPLY
#ifdef MT_DEBUG

View File

@ -181,6 +181,7 @@ uint8_t uiReservedFillByte; // reserved to meet 4 bytes alignment
SCabacCtx sCabacCtx;
int32_t iCabacInitIdc;
int32_t iMbSkipRun;
uint32_t uiSliceConsumeTime;
} SSlice, *PSlice;
}

View File

@ -55,7 +55,7 @@ void UpdateMbListNeighborParallel (SDqLayer* pCurDq,
SMB* pMbList,
const int32_t kiSliceIdc);
void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume);
void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq);
int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t kiSliceNum);

View File

@ -90,7 +90,6 @@ int32_t* pCountMbNumInSlice; /* count number of MBs in every
uint32_t uiSliceSizeConstraint; /* in byte */
int32_t iMaxSliceNumConstraint; /* maximal number of slices constraint */
uint32_t* pSliceConsumeTime;
} SSliceCtx;

View File

@ -3507,7 +3507,7 @@ int32_t GetSubSequenceId (sWelsEncCtx* pCtx, EVideoFrameType eFrameType) {
// writing parasets for (simulcast) svc
int32_t WriteSsvcParaset (sWelsEncCtx* pCtx, const int32_t kiSpatialNum,
SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) {
int32_t iNonVclSize = 0, iCountNal = 0, iReturn;
int32_t iNonVclSize = 0, iCountNal = 0, iReturn = 0;
iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iNonVclSize);
WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
@ -4299,8 +4299,7 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi, const SSour
&& pSvcParam->bUseLoadBalancing
&& pSvcParam->iMultipleThreadIdc > 1 &&
pSvcParam->iMultipleThreadIdc >= pParam->sSliceArgument.uiSliceNum) {
CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer,
pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]);
CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer);
#if defined(MT_DEBUG)
TrackSliceComplexities (pCtx, iCurDid);
#endif//#if defined(MT_DEBUG)
@ -4814,17 +4813,6 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
pMA->WelsFree (pCurLayer->sSliceEncCtx.pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
pCurLayer->sSliceEncCtx.pCountMbNumInSlice = pCountMbNumInSlice;
uint32_t* pSliceConsumeTime = (uint32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (uint32_t),
"pSliceSeg->pSliceConsumeTime");
if (NULL == pSliceConsumeTime) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
"CWelsH264SVCEncoder::DynSliceRealloc: realloc pSliceConsumeTime not successful");
return ENC_RETURN_MEMALLOCERR;
}
memcpy (pSliceConsumeTime, pCurLayer->sSliceEncCtx.pSliceConsumeTime, sizeof (int32_t) * iMaxSliceNumOld);
pMA->WelsFree (pCurLayer->sSliceEncCtx.pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
pCurLayer->sSliceEncCtx.pSliceConsumeTime = pSliceConsumeTime;
//deal with rate control variables
const int32_t kiCurDid = pCtx->uiDependencyId;
SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC");

View File

@ -128,23 +128,23 @@ void UpdateMbListNeighborParallel (SDqLayer* pCurDq,
} while (iIdx <= kiEndMbInSlice);
}
void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume) {
void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq) {
SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer;
int32_t* pRatioList = (int32_t*)pRatio;
int32_t iAvI[MAX_SLICES_NUM];
int32_t iSumAv = 0;
uint32_t* pSliceTime = (uint32_t*)pSliceConsume;
int32_t* pCountMbInSlice = (int32_t*)pSliceCtx->pCountMbNumInSlice;
const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame;
int32_t iSliceIdx = 0;
int32_t iAvI[MAX_SLICES_NUM];
WelsEmms();
while (iSliceIdx < kiSliceCount) {
iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]);
MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d",
iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceInLayer[iSliceIdx].uiSliceConsumeTime);
MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d",
iSliceIdx,
pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]);
pSliceInLayer[iSliceIdx].uiSliceConsumeTime, pCountMbInSlice[iSliceIdx]);
iSumAv += iAvI[iSliceIdx];
++ iSliceIdx;
@ -154,8 +154,10 @@ void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceCons
}
}
int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) {
uint32_t* pSliceConsume = (uint32_t*)pConsumeTime;
int32_t NeedDynamicAdjust (SSlice* pSliceInLayer, const int32_t iSliceNum) {
if ( NULL == pSliceInLayer )
return false;
uint32_t uiTotalConsume = 0;
int32_t iSliceIdx = 0;
int32_t iNeedAdj = false;
@ -163,7 +165,7 @@ int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) {
WelsEmms();
while (iSliceIdx < iSliceNum) {
uiTotalConsume += pSliceConsume[iSliceIdx];
uiTotalConsume += pSliceInLayer[iSliceIdx].uiSliceConsumeTime;
iSliceIdx ++;
}
if (uiTotalConsume == 0) {
@ -178,7 +180,7 @@ int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) {
float fRmse = .0f; // root mean square error of pSlice consume ratios
const float kfMeanRatio = 1.0f / iSliceNum;
do {
const float fRatio = 1.0f * pSliceConsume[iSliceIdx] / uiTotalConsume;
const float fRatio = 1.0f * pSliceInLayer[iSliceIdx].uiSliceConsumeTime / uiTotalConsume;
const float fDiffRatio = fRatio - kfMeanRatio;
fRmse += (fDiffRatio * fDiffRatio);
++ iSliceIdx;
@ -370,12 +372,9 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara
&& (pPara->bUseLoadBalancing)
&& (pPara->iMultipleThreadIdc > 1)
&& (pPara->iMultipleThreadIdc >= kiSliceNum)) {
pSmt->pSliceConsumeTime[iIdx] = (uint32_t*)pMa->WelsMallocz (kiSliceNum * sizeof (uint32_t), "pSliceConsumeTime[]");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceConsumeTime[iIdx]), FreeMemorySvc (ppCtx))
pSmt->pSliceComplexRatio[iIdx] = (int32_t*)pMa->WelsMalloc (kiSliceNum * sizeof (int32_t), "pSliceComplexRatio[]");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceComplexRatio[iIdx]), FreeMemorySvc (ppCtx))
} else {
pSmt->pSliceConsumeTime[iIdx] = NULL;
pSmt->pSliceComplexRatio[iIdx] = NULL;
}
@ -384,7 +383,6 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara
}
++ iIdx;
}
// NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
#ifdef MT_DEBUG
// file handle for MT debug
@ -554,11 +552,7 @@ void ReleaseMtResource (sWelsEncCtx** ppCtx) {
iIdx = 0;
while (iIdx < pCodingParam->iSpatialLayerNum) {
if (pSmt->pSliceConsumeTime[iIdx]) {
pMa->WelsFree (pSmt->pSliceConsumeTime[iIdx], "pSliceConsumeTime[]");
pSmt->pSliceConsumeTime[iIdx] = NULL;
}
if (pSmt->pSliceComplexRatio[iIdx] != NULL) {
if (pSmt->pSliceComplexRatio[iIdx] != NULL) {
pMa->WelsFree (pSmt->pSliceComplexRatio[iIdx], "pSliceComplexRatio[]");
pSmt->pSliceComplexRatio[iIdx] = NULL;
}
@ -814,12 +808,12 @@ WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);
if (bDsaFlag) {
pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t) (
pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime = (uint32_t) (
WelsTime() - iSliceStart);
MT_TRACE_LOG (& (pEncPEncCtx->sLogCtx), WELS_LOG_INFO,
"[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
"[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
pEncPEncCtx->iCodingIndex, iSliceIdx,
pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx], iSliceSize,
pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime, iSliceSize,
pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
pCurDq->sSliceEncCtx.pCountMbNumInSlice[iSliceIdx]);
}
@ -1067,10 +1061,9 @@ int32_t AdjustBaseLayer (sWelsEncCtx* pCtx) {
#endif//MT_DEBUG
pCtx->pCurDqLayer = pCurDq;
memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[0]), (pCurDq->sSliceEncCtx.pSliceConsumeTime),
pCurDq->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t));
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->sSliceEncCtx.iSliceNumInFrame);
iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[0]->sLayerInfo.pSliceInLayer, pCurDq->sSliceEncCtx.iSliceNumInFrame);
if (iNeedAdj)
DynamicAdjustSlicing (pCtx,
pCurDq,
@ -1100,12 +1093,10 @@ int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid) {
&& (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE
&& pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid -
1].sSliceArgument.uiSliceNum);
memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]), (pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime),
pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t));
if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid - 1],
iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid - 1]->sLayerInfo.pSliceInLayer,
pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
if (iNeedAdj)
DynamicAdjustSlicing (pCtx,
@ -1115,7 +1106,7 @@ int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid) {
);
} else { // use temporal layer for complexity estimation
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid],
iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid]->sLayerInfo.pSliceInLayer,
pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
if (iNeedAdj)
DynamicAdjustSlicing (pCtx,
@ -1163,11 +1154,11 @@ void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t
pPara = pCtx->pSvcParam;
while (iSpatialIdx < iSpatialNum) {
const int32_t kiDid = pDidList[iSpatialIdx];
SSpatialLayerInternal* pDlp = &pPara->sDependencyLayers[kiDid];
SSliceConfig* pSliceArgument = &pDlp->sSliceArgument;
SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid];
SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
const int32_t kiDid = pDidList[iSpatialIdx];
SSliceConfig* pSliceArgument = &pPara->sDependencyLayers[kiDid].sSliceArgument;
SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid];
SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer;
SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame;
if (pCtx->pSliceThreading) {
if (pCtx->pSliceThreading->pFSliceDiff
@ -1178,11 +1169,11 @@ void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t
uint32_t uiMaxT = 0;
int32_t iMaxI = 0;
while (i < kuiCountSliceNum) {
if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid] != NULL)
if (pSliceInLayer[i] != NULL)
fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n",
pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i], pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i] > uiMaxT) {
uiMaxT = pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i];
pSliceInLayer[i].uiSliceConsumeTime, pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
if (pSliceInLayer[i].uiSliceConsumeTime > uiMaxT) {
uiMaxT = pSliceInLayer[i].uiSliceConsumeTime;
iMaxI = i;
}
++ i;

View File

@ -388,11 +388,7 @@ int32_t InitSliceSegment (SDqLayer* pCurDq,
pSliceSeg->pCountMbNumInSlice = NULL;
}
if (NULL != pSliceSeg->pSliceConsumeTime) {
pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
pSliceSeg->pSliceConsumeTime = NULL;
}
// just for safe
pSliceSeg->iSliceNumInFrame = 0;
pSliceSeg->iMbNumInFrame = 0;
@ -411,7 +407,6 @@ int32_t InitSliceSegment (SDqLayer* pCurDq,
"pSliceSeg->pCountMbNumInSlice");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
pSliceSeg->pSliceConsumeTime = NULL;
pSliceSeg->uiSliceMode = uiSliceMode;
pSliceSeg->iMbWidth = kiMbWidth;
pSliceSeg->iMbHeight = kiMbHeight;
@ -438,10 +433,6 @@ int32_t InitSliceSegment (SDqLayer* pCurDq,
"pSliceSeg->pCountMbNumInSlice");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
pSliceSeg->pSliceConsumeTime = (uint32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (uint32_t),
"pSliceSeg->pSliceConsumeTime");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pSliceConsumeTime)
pSliceSeg->uiSliceMode = pSliceArgument->uiSliceMode;
pSliceSeg->iMbWidth = kiMbWidth;
@ -489,11 +480,6 @@ void UninitSliceSegment (SDqLayer* pCurDq, CMemoryAlign* pMa) {
pSliceSeg->pCountMbNumInSlice = NULL;
}
if (NULL != pSliceSeg->pSliceConsumeTime) {
pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
pSliceSeg->pSliceConsumeTime = NULL;
}
pSliceSeg->iMbNumInFrame = 0;
pSliceSeg->iMbWidth = 0;

View File

@ -216,12 +216,12 @@ WelsErrorType CWelsLoadBalancingSlicingEncodingTask::InitTask() {
void CWelsLoadBalancingSlicingEncodingTask::FinishTask() {
CWelsSliceEncodingTask::FinishTask();
m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart);
m_pSlice->uiSliceConsumeTime = (uint32_t) (WelsTime() - m_iSliceStart);
WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
"[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
"[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
m_pCtx->iCodingIndex,
m_iSliceIdx,
m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx],
m_pSlice->uiSliceConsumeTime,
m_iSliceSize,
m_pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[m_iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
m_pCtx->pCurDqLayer->sSliceEncCtx.pCountMbNumInSlice[m_iSliceIdx]);