Use WelsMultipleEventsWaitSingleBlocking with a master event for waiting on finished threads
This allows using the same codepath for both unix and windows for distributing new slices to code to threads. This also improves the performance on unix - instead of waiting for all the current threads to finish their current slice before handing out a new slice to each of them (where the threads that finish first will just wait instead of immediately getting a new slice to work on), we now use the same logic as on windows. In one setup, it improves the performance of encoding from ~920 fps to ~950 fps, and in another setup it goes from ~390 fps to ~660 fps. (These tests were done with the SM_ROWMB_SLICE mode, which heavily exercises the code for distributing new slices to the worker threads.) The extra WelsEventSignal call on windows where it isn't strictly necessary doesn't incur any measurable slowdown, so it is kept without any extra ifdefs to keep the code more readable and unified.
This commit is contained in:
parent
d0a81355b0
commit
801da26d1d
@ -94,6 +94,7 @@ SSliceThreadPrivateData* pThreadPEncCtx;// thread context, [iThreadIdx]
|
||||
char eventNamespace[100];
|
||||
WELS_THREAD_HANDLE pThreadHandles[MAX_THREADS_NUM];// thread handles, [iThreadIdx]
|
||||
WELS_EVENT pSliceCodedEvent[MAX_THREADS_NUM];// events for slice coded state, [iThreadIdx]
|
||||
WELS_EVENT pSliceCodedMasterEvent; // events for signalling that some event in pSliceCodedEvent has been signalled
|
||||
WELS_EVENT pReadySliceCodingEvent[MAX_THREADS_NUM]; // events for slice coding ready, [iThreadIdx]
|
||||
WELS_EVENT pUpdateMbListEvent[MAX_THREADS_NUM]; // signal to update mb list neighbor for various slices
|
||||
WELS_EVENT pFinUpdateMbListEvent[MAX_THREADS_NUM]; // signal to indicate finish updating mb list
|
||||
|
@ -3225,7 +3225,7 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
|
||||
return ENC_RETURN_UNEXPECTED;
|
||||
}
|
||||
|
||||
WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
|
||||
WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
|
||||
|
||||
|
||||
// all slices are finished coding here
|
||||
@ -3266,12 +3266,12 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
|
||||
while (1) {
|
||||
if (iIndexOfSliceToBeCoded >= iSliceCount && iNumThreadsRunning <= 0)
|
||||
break;
|
||||
#ifdef _WIN32
|
||||
WELS_THREAD_ERROR_CODE lwait = 0;
|
||||
int32_t iEventId = -1;
|
||||
|
||||
lwait = WelsMultipleEventsWaitSingleBlocking (iNumThreadsScheduled,
|
||||
&pCtx->pSliceThreading->pSliceCodedEvent[0]);
|
||||
&pCtx->pSliceThreading->pSliceCodedEvent[0],
|
||||
&pCtx->pSliceThreading->pSliceCodedMasterEvent);
|
||||
iEventId = (int32_t) (lwait - WELS_THREAD_ERROR_WAIT_OBJECT_0);
|
||||
if (iEventId >= 0 && iEventId < iNumThreadsScheduled) {
|
||||
if (iIndexOfSliceToBeCoded < iSliceCount) {
|
||||
@ -3285,29 +3285,6 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
|
||||
-- iNumThreadsRunning;
|
||||
}
|
||||
}
|
||||
#else
|
||||
// TODO for pthread platforms
|
||||
// alternate implementation using blocking due non-blocking with timeout mode not support at wels thread lib, tune back if available
|
||||
WelsMultipleEventsWaitAllBlocking (iNumThreadsRunning, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
|
||||
WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
|
||||
if (iIndexOfSliceToBeCoded < iSliceCount) {
|
||||
int32_t iThreadIdx = 0;
|
||||
// pick up succeeding slices for threading if left
|
||||
while (iThreadIdx < iNumThreadsScheduled) {
|
||||
if (iIndexOfSliceToBeCoded >= iSliceCount)
|
||||
break;
|
||||
pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].iSliceIndex = iIndexOfSliceToBeCoded;
|
||||
WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iThreadIdx]);
|
||||
|
||||
++ iIndexOfSliceToBeCoded;
|
||||
++ iThreadIdx;
|
||||
}
|
||||
// update iNumThreadsRunning
|
||||
iNumThreadsRunning = iThreadIdx;
|
||||
} else {
|
||||
iNumThreadsRunning = 0;
|
||||
}
|
||||
#endif//_WIN32
|
||||
}//while(1)
|
||||
|
||||
// all slices are finished coding here
|
||||
@ -3329,7 +3306,7 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo * pFbi, const SSou
|
||||
return ENC_RETURN_UNEXPECTED;
|
||||
}
|
||||
|
||||
WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
|
||||
WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
|
||||
WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
|
||||
|
||||
iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);
|
||||
|
@ -351,10 +351,11 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara
|
||||
|
||||
MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "encpEncCtx= 0x%p\n", (void*) (*ppCtx));
|
||||
|
||||
char name[SEM_NAME_MAX] = {0};
|
||||
WELS_THREAD_ERROR_CODE err = 0;
|
||||
|
||||
iIdx = 0;
|
||||
while (iIdx < iThreadNum) {
|
||||
char name[SEM_NAME_MAX] = {0};
|
||||
WELS_THREAD_ERROR_CODE err = 0;
|
||||
pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) (*ppCtx);
|
||||
pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
|
||||
pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
|
||||
@ -386,6 +387,10 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara
|
||||
++ iIdx;
|
||||
}
|
||||
|
||||
WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
|
||||
err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
|
||||
MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d\n", name, err, errno);
|
||||
|
||||
(*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
|
||||
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceBs), FreeMemorySvc (ppCtx))
|
||||
|
||||
@ -444,8 +449,8 @@ void ReleaseMtResource (sWelsEncCtx** ppCtx) {
|
||||
if (NULL == pSmt)
|
||||
return;
|
||||
|
||||
char ename[SEM_NAME_MAX] = {0};
|
||||
while (iIdx < iThreadNum) {
|
||||
char ename[SEM_NAME_MAX] = {0};
|
||||
// length of semaphore name should be system constrained at least on mac 10.7
|
||||
#ifdef _WIN32
|
||||
if (pSmt->pThreadHandles != NULL && pSmt->pThreadHandles[iIdx] != NULL)
|
||||
@ -467,6 +472,8 @@ void ReleaseMtResource (sWelsEncCtx** ppCtx) {
|
||||
|
||||
++ iIdx;
|
||||
}
|
||||
WelsSnprintf (ename, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
|
||||
WelsEventClose (&pSmt->pSliceCodedMasterEvent, ename);
|
||||
|
||||
WelsMutexDestroy (&pSmt->mutexSliceNumUpdate);
|
||||
WelsMutexDestroy (&((*ppCtx)->mutexEncoderError));
|
||||
@ -864,6 +871,8 @@ WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
|
||||
|
||||
WelsEventSignal (
|
||||
&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
|
||||
WelsEventSignal (
|
||||
&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
|
||||
} else { // for SM_DYN_SLICE parallelization
|
||||
SSliceCtx* pSliceCtx = pCurDq->pSliceEncCtx;
|
||||
const int32_t kiPartitionId = iThreadIdx;
|
||||
@ -967,6 +976,7 @@ WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
|
||||
break;
|
||||
|
||||
WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
|
||||
WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
|
||||
}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
|
Loading…
x
Reference in New Issue
Block a user