diff --git a/codec/encoder/core/inc/mt_defs.h b/codec/encoder/core/inc/mt_defs.h index de9163e4..d49d5ad3 100644 --- a/codec/encoder/core/inc/mt_defs.h +++ b/codec/encoder/core/inc/mt_defs.h @@ -80,7 +80,6 @@ WELS_EVENT pThreadMasterEvent[MAX_THREADS_NUM]; // event WELS_MUTEX mutexSliceNumUpdate; // for dynamic slicing mode MT -uint32_t* pSliceConsumeTime[MAX_DEPENDENCY_LAYER]; // consuming time for each slice, [iSpatialIdx][uiSliceIdx] int32_t* pSliceComplexRatio[MAX_DEPENDENCY_LAYER]; // *INT_MULTIPLY #ifdef MT_DEBUG diff --git a/codec/encoder/core/inc/slice.h b/codec/encoder/core/inc/slice.h index fe0b8ab3..55e8ee1d 100644 --- a/codec/encoder/core/inc/slice.h +++ b/codec/encoder/core/inc/slice.h @@ -181,6 +181,7 @@ uint8_t uiReservedFillByte; // reserved to meet 4 bytes alignment SCabacCtx sCabacCtx; int32_t iCabacInitIdc; int32_t iMbSkipRun; +uint32_t uiSliceConsumeTime; } SSlice, *PSlice; } diff --git a/codec/encoder/core/inc/slice_multi_threading.h b/codec/encoder/core/inc/slice_multi_threading.h index 5efb52c2..67838f29 100644 --- a/codec/encoder/core/inc/slice_multi_threading.h +++ b/codec/encoder/core/inc/slice_multi_threading.h @@ -55,7 +55,7 @@ void UpdateMbListNeighborParallel (SDqLayer* pCurDq, SMB* pMbList, const int32_t kiSliceIdc); -void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume); +void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq); int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t kiSliceNum); diff --git a/codec/encoder/core/inc/svc_enc_slice_segment.h b/codec/encoder/core/inc/svc_enc_slice_segment.h index c13e2af8..e3b2bb40 100644 --- a/codec/encoder/core/inc/svc_enc_slice_segment.h +++ b/codec/encoder/core/inc/svc_enc_slice_segment.h @@ -90,7 +90,6 @@ int32_t* pCountMbNumInSlice; /* count number of MBs in every uint32_t uiSliceSizeConstraint; /* in byte */ int32_t iMaxSliceNumConstraint; /* maximal number of slices constraint */ -uint32_t* pSliceConsumeTime; } SSliceCtx; diff --git a/codec/encoder/core/src/encoder_ext.cpp b/codec/encoder/core/src/encoder_ext.cpp index 187585fa..21e6ec16 100644 --- a/codec/encoder/core/src/encoder_ext.cpp +++ b/codec/encoder/core/src/encoder_ext.cpp @@ -3507,7 +3507,7 @@ int32_t GetSubSequenceId (sWelsEncCtx* pCtx, EVideoFrameType eFrameType) { // writing parasets for (simulcast) svc int32_t WriteSsvcParaset (sWelsEncCtx* pCtx, const int32_t kiSpatialNum, SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) { - int32_t iNonVclSize = 0, iCountNal = 0, iReturn; + int32_t iNonVclSize = 0, iCountNal = 0, iReturn = 0; iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iNonVclSize); WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) @@ -4299,8 +4299,7 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi, const SSour && pSvcParam->bUseLoadBalancing && pSvcParam->iMultipleThreadIdc > 1 && pSvcParam->iMultipleThreadIdc >= pParam->sSliceArgument.uiSliceNum) { - CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer, - pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]); + CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer); #if defined(MT_DEBUG) TrackSliceComplexities (pCtx, iCurDid); #endif//#if defined(MT_DEBUG) @@ -4814,17 +4813,6 @@ int32_t DynSliceRealloc (sWelsEncCtx* pCtx, pMA->WelsFree (pCurLayer->sSliceEncCtx.pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice"); pCurLayer->sSliceEncCtx.pCountMbNumInSlice = pCountMbNumInSlice; - uint32_t* pSliceConsumeTime = (uint32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (uint32_t), - "pSliceSeg->pSliceConsumeTime"); - if (NULL == pSliceConsumeTime) { - WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, - "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSliceConsumeTime not successful"); - return ENC_RETURN_MEMALLOCERR; - } - memcpy (pSliceConsumeTime, pCurLayer->sSliceEncCtx.pSliceConsumeTime, sizeof (int32_t) * iMaxSliceNumOld); - pMA->WelsFree (pCurLayer->sSliceEncCtx.pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime"); - pCurLayer->sSliceEncCtx.pSliceConsumeTime = pSliceConsumeTime; - //deal with rate control variables const int32_t kiCurDid = pCtx->uiDependencyId; SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC"); diff --git a/codec/encoder/core/src/slice_multi_threading.cpp b/codec/encoder/core/src/slice_multi_threading.cpp index a7b866f9..78cf5ec0 100644 --- a/codec/encoder/core/src/slice_multi_threading.cpp +++ b/codec/encoder/core/src/slice_multi_threading.cpp @@ -128,23 +128,23 @@ void UpdateMbListNeighborParallel (SDqLayer* pCurDq, } while (iIdx <= kiEndMbInSlice); } -void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume) { +void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq) { SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer; int32_t* pRatioList = (int32_t*)pRatio; - int32_t iAvI[MAX_SLICES_NUM]; int32_t iSumAv = 0; - uint32_t* pSliceTime = (uint32_t*)pSliceConsume; int32_t* pCountMbInSlice = (int32_t*)pSliceCtx->pCountMbNumInSlice; const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; int32_t iSliceIdx = 0; + int32_t iAvI[MAX_SLICES_NUM]; WelsEmms(); while (iSliceIdx < kiSliceCount) { - iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]); - MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d", + iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceInLayer[iSliceIdx].uiSliceConsumeTime); + MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d", iSliceIdx, - pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]); + pSliceInLayer[iSliceIdx].uiSliceConsumeTime, pCountMbInSlice[iSliceIdx]); iSumAv += iAvI[iSliceIdx]; ++ iSliceIdx; @@ -154,8 +154,10 @@ void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceCons } } -int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) { - uint32_t* pSliceConsume = (uint32_t*)pConsumeTime; +int32_t NeedDynamicAdjust (SSlice* pSliceInLayer, const int32_t iSliceNum) { + if ( NULL == pSliceInLayer ) + return false; + uint32_t uiTotalConsume = 0; int32_t iSliceIdx = 0; int32_t iNeedAdj = false; @@ -163,7 +165,7 @@ int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) { WelsEmms(); while (iSliceIdx < iSliceNum) { - uiTotalConsume += pSliceConsume[iSliceIdx]; + uiTotalConsume += pSliceInLayer[iSliceIdx].uiSliceConsumeTime; iSliceIdx ++; } if (uiTotalConsume == 0) { @@ -178,7 +180,7 @@ int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) { float fRmse = .0f; // root mean square error of pSlice consume ratios const float kfMeanRatio = 1.0f / iSliceNum; do { - const float fRatio = 1.0f * pSliceConsume[iSliceIdx] / uiTotalConsume; + const float fRatio = 1.0f * pSliceInLayer[iSliceIdx].uiSliceConsumeTime / uiTotalConsume; const float fDiffRatio = fRatio - kfMeanRatio; fRmse += (fDiffRatio * fDiffRatio); ++ iSliceIdx; @@ -370,12 +372,9 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara && (pPara->bUseLoadBalancing) && (pPara->iMultipleThreadIdc > 1) && (pPara->iMultipleThreadIdc >= kiSliceNum)) { - pSmt->pSliceConsumeTime[iIdx] = (uint32_t*)pMa->WelsMallocz (kiSliceNum * sizeof (uint32_t), "pSliceConsumeTime[]"); - WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceConsumeTime[iIdx]), FreeMemorySvc (ppCtx)) pSmt->pSliceComplexRatio[iIdx] = (int32_t*)pMa->WelsMalloc (kiSliceNum * sizeof (int32_t), "pSliceComplexRatio[]"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceComplexRatio[iIdx]), FreeMemorySvc (ppCtx)) } else { - pSmt->pSliceConsumeTime[iIdx] = NULL; pSmt->pSliceComplexRatio[iIdx] = NULL; } @@ -384,7 +383,6 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara } ++ iIdx; } - // NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS #ifdef MT_DEBUG // file handle for MT debug @@ -554,11 +552,7 @@ void ReleaseMtResource (sWelsEncCtx** ppCtx) { iIdx = 0; while (iIdx < pCodingParam->iSpatialLayerNum) { - if (pSmt->pSliceConsumeTime[iIdx]) { - pMa->WelsFree (pSmt->pSliceConsumeTime[iIdx], "pSliceConsumeTime[]"); - pSmt->pSliceConsumeTime[iIdx] = NULL; - } - if (pSmt->pSliceComplexRatio[iIdx] != NULL) { + if (pSmt->pSliceComplexRatio[iIdx] != NULL) { pMa->WelsFree (pSmt->pSliceComplexRatio[iIdx], "pSliceComplexRatio[]"); pSmt->pSliceComplexRatio[iIdx] = NULL; } @@ -814,12 +808,12 @@ WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) { pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx); if (bDsaFlag) { - pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t) ( + pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime = (uint32_t) ( WelsTime() - iSliceStart); MT_TRACE_LOG (& (pEncPEncCtx->sLogCtx), WELS_LOG_INFO, - "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d", + "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d", pEncPEncCtx->iCodingIndex, iSliceIdx, - pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx], iSliceSize, + pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime, iSliceSize, pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice, pCurDq->sSliceEncCtx.pCountMbNumInSlice[iSliceIdx]); } @@ -1067,10 +1061,9 @@ int32_t AdjustBaseLayer (sWelsEncCtx* pCtx) { #endif//MT_DEBUG pCtx->pCurDqLayer = pCurDq; - memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[0]), (pCurDq->sSliceEncCtx.pSliceConsumeTime), - pCurDq->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t)); + // do not need adjust due to not different at both slices of consumed time - iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->sSliceEncCtx.iSliceNumInFrame); + iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[0]->sLayerInfo.pSliceInLayer, pCurDq->sSliceEncCtx.iSliceNumInFrame); if (iNeedAdj) DynamicAdjustSlicing (pCtx, pCurDq, @@ -1100,12 +1093,10 @@ int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid) { && (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceNum); - memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]), (pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime), - pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t)); if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation // do not need adjust due to not different at both slices of consumed time - iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid - 1], + iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid - 1]->sLayerInfo.pSliceInLayer, pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame); if (iNeedAdj) DynamicAdjustSlicing (pCtx, @@ -1115,7 +1106,7 @@ int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid) { ); } else { // use temporal layer for complexity estimation // do not need adjust due to not different at both slices of consumed time - iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid], + iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid]->sLayerInfo.pSliceInLayer, pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame); if (iNeedAdj) DynamicAdjustSlicing (pCtx, @@ -1163,11 +1154,11 @@ void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t pPara = pCtx->pSvcParam; while (iSpatialIdx < iSpatialNum) { - const int32_t kiDid = pDidList[iSpatialIdx]; - SSpatialLayerInternal* pDlp = &pPara->sDependencyLayers[kiDid]; - SSliceConfig* pSliceArgument = &pDlp->sSliceArgument; - SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid]; - SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; + const int32_t kiDid = pDidList[iSpatialIdx]; + SSliceConfig* pSliceArgument = &pPara->sDependencyLayers[kiDid].sSliceArgument; + SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid]; + SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer; + SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame; if (pCtx->pSliceThreading) { if (pCtx->pSliceThreading->pFSliceDiff @@ -1178,11 +1169,11 @@ void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t uint32_t uiMaxT = 0; int32_t iMaxI = 0; while (i < kuiCountSliceNum) { - if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid] != NULL) + if (pSliceInLayer[i] != NULL) fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n", - pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i], pCtx->iCodingIndex, kiDid, i /*/ 1000*/); - if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i] > uiMaxT) { - uiMaxT = pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i]; + pSliceInLayer[i].uiSliceConsumeTime, pCtx->iCodingIndex, kiDid, i /*/ 1000*/); + if (pSliceInLayer[i].uiSliceConsumeTime > uiMaxT) { + uiMaxT = pSliceInLayer[i].uiSliceConsumeTime; iMaxI = i; } ++ i; diff --git a/codec/encoder/core/src/svc_enc_slice_segment.cpp b/codec/encoder/core/src/svc_enc_slice_segment.cpp index ae8ad32d..21eb31e1 100644 --- a/codec/encoder/core/src/svc_enc_slice_segment.cpp +++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp @@ -388,11 +388,7 @@ int32_t InitSliceSegment (SDqLayer* pCurDq, pSliceSeg->pCountMbNumInSlice = NULL; } - if (NULL != pSliceSeg->pSliceConsumeTime) { - pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime"); - pSliceSeg->pSliceConsumeTime = NULL; - } // just for safe pSliceSeg->iSliceNumInFrame = 0; pSliceSeg->iMbNumInFrame = 0; @@ -411,7 +407,6 @@ int32_t InitSliceSegment (SDqLayer* pCurDq, "pSliceSeg->pCountMbNumInSlice"); WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice) - pSliceSeg->pSliceConsumeTime = NULL; pSliceSeg->uiSliceMode = uiSliceMode; pSliceSeg->iMbWidth = kiMbWidth; pSliceSeg->iMbHeight = kiMbHeight; @@ -438,10 +433,6 @@ int32_t InitSliceSegment (SDqLayer* pCurDq, "pSliceSeg->pCountMbNumInSlice"); WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice) - pSliceSeg->pSliceConsumeTime = (uint32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (uint32_t), - "pSliceSeg->pSliceConsumeTime"); - WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pSliceConsumeTime) - pSliceSeg->uiSliceMode = pSliceArgument->uiSliceMode; pSliceSeg->iMbWidth = kiMbWidth; @@ -489,11 +480,6 @@ void UninitSliceSegment (SDqLayer* pCurDq, CMemoryAlign* pMa) { pSliceSeg->pCountMbNumInSlice = NULL; } - if (NULL != pSliceSeg->pSliceConsumeTime) { - pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime"); - - pSliceSeg->pSliceConsumeTime = NULL; - } pSliceSeg->iMbNumInFrame = 0; pSliceSeg->iMbWidth = 0; diff --git a/codec/encoder/core/src/wels_task_encoder.cpp b/codec/encoder/core/src/wels_task_encoder.cpp index fff9cbb5..c6e05682 100644 --- a/codec/encoder/core/src/wels_task_encoder.cpp +++ b/codec/encoder/core/src/wels_task_encoder.cpp @@ -216,12 +216,12 @@ WelsErrorType CWelsLoadBalancingSlicingEncodingTask::InitTask() { void CWelsLoadBalancingSlicingEncodingTask::FinishTask() { CWelsSliceEncodingTask::FinishTask(); - m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart); + m_pSlice->uiSliceConsumeTime = (uint32_t) (WelsTime() - m_iSliceStart); WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG, - "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d", + "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d", m_pCtx->iCodingIndex, m_iSliceIdx, - m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx], + m_pSlice->uiSliceConsumeTime, m_iSliceSize, m_pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[m_iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice, m_pCtx->pCurDqLayer->sSliceEncCtx.pCountMbNumInSlice[m_iSliceIdx]);