From f6ce43f83bd8f6f453c6a642acd62aba3f339b37 Mon Sep 17 00:00:00 2001 From: dongzha Date: Tue, 27 May 2014 16:59:00 +0800 Subject: [PATCH] Use Int instead of Double in Rate Control and Modify anchor SHA1 value --- codec/common/inc/macros.h | 9 + codec/encoder/core/inc/mt_defs.h | 2 +- codec/encoder/core/inc/rc.h | 33 ++-- codec/encoder/core/src/ratectl.cpp | 174 +++++++++--------- .../core/src/slice_multi_threading.cpp | 24 +-- .../core/src/svc_enc_slice_segment.cpp | 2 +- codec/encoder/core/src/wels_preprocess.cpp | 2 +- codec/processing/interface/IWelsVP.h | 2 +- .../AdaptiveQuantization.cpp | 80 ++++---- codec/processing/src/common/util.h | 4 + test/api/decode_encode_test.cpp | 4 +- test/api/encoder_test.cpp | 14 +- 12 files changed, 187 insertions(+), 163 deletions(-) diff --git a/codec/common/inc/macros.h b/codec/common/inc/macros.h index 11a46a75..164c4b5e 100644 --- a/codec/common/inc/macros.h +++ b/codec/common/inc/macros.h @@ -44,6 +44,7 @@ #include #include "typedefs.h" + /* * ENFORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack * _tp: type @@ -125,6 +126,14 @@ __declspec(align(alignment)) type name[(sizex)*(sizey)] #define WELS_ROUND(x) ((int32_t)(0.5+(x))) #endif//WELS_ROUND +#ifndef WELS_DIV_ROUND +#define WELS_DIV_ROUND(x,y) ((int32_t)((y)==0?((x)/((y)+1)):(((y)/2+(x))/(y)))) +#endif//WELS_DIV_ROUND + +#ifndef WELS_DIV_ROUND64 +#define WELS_DIV_ROUND64(x,y) ((int64_t)((y)==0?((x)/((y)+1)):(((y)/2+(x))/(y)))) +#endif//WELS_DIV_ROUND64 + #define WELS_NON_ZERO_COUNT_AVERAGE(nC,nA,nB) { \ nC = nA + nB + 1; \ nC >>= (uint8_t)( nA != -1 && nB != -1); \ diff --git a/codec/encoder/core/inc/mt_defs.h b/codec/encoder/core/inc/mt_defs.h index 8c136507..9600d8e7 100644 --- a/codec/encoder/core/inc/mt_defs.h +++ b/codec/encoder/core/inc/mt_defs.h @@ -85,7 +85,7 @@ typedef struct TagSliceThreading { WELS_MUTEX mutexSliceNumUpdate; // for dynamic slicing mode MT uint32_t* pSliceConsumeTime[MAX_DEPENDENCY_LAYER]; // consuming time for each slice, [iSpatialIdx][uiSliceIdx] - float* pSliceComplexRatio[MAX_DEPENDENCY_LAYER]; + int32_t* pSliceComplexRatio[MAX_DEPENDENCY_LAYER]; // *INT_MULTIPLY #ifdef MT_DEBUG FILE* pFSliceDiff; // file handle for debug diff --git a/codec/encoder/core/inc/rc.h b/codec/encoder/core/inc/rc.h index 1a24630d..926af856 100644 --- a/codec/encoder/core/inc/rc.h +++ b/codec/encoder/core/inc/rc.h @@ -107,18 +107,21 @@ enum { //bits allocation #define MAX_BITS_VARY_PERCENTAGE 100 //bits vary range in percentage +#define INT_MULTIPLY 100 // use to multiply in Double to Int Conversion, should be same as AQ_QSTEP_INT_MULTIPLY in WelsVP +#define WEIGHT_MULTIPLY 2000 +#define REMAIN_BITS_TH (10) // *INT_MULTIPLY #define VGOP_BITS_PERCENTAGE_DIFF 5 -#define IDR_BITRATE_RATIO 4.0 -#define FRAME_iTargetBits_VARY_RANGE 0.5 +#define IDR_BITRATE_RATIO 4 +#define FRAME_iTargetBits_VARY_RANGE 50 // *INT_MULTIPLY //R-Q Model -#define LINEAR_MODEL_DECAY_FACTOR 0.8 -#define FRAME_CMPLX_RATIO_RANGE 0.1 -#define SMOOTH_FACTOR_MIN_VALUE 0.02 +#define LINEAR_MODEL_DECAY_FACTOR 80 // *INT_MULTIPLY +#define FRAME_CMPLX_RATIO_RANGE 10 // *INT_MULTIPLY +#define SMOOTH_FACTOR_MIN_VALUE 2 // *INT_MULTIPLY //#define VGOP_BITS_MIN_RATIO 0.8 //skip and padding -#define SKIP_RATIO 0.5 -#define PADDING_BUFFER_RATIO 0.5 -#define PADDING_THRESHOLD 0.05 +#define SKIP_RATIO 50 // *INT_MULTIPLY +#define PADDING_BUFFER_RATIO 50 // *INT_MULTIPLY +#define PADDING_THRESHOLD 5 //*INT_MULTIPLY typedef struct TagRCSlicing { int32_t iComplexityIndexSlice; @@ -138,10 +141,10 @@ typedef struct TagRCSlicing { typedef struct TagRCTemporal { int32_t iMinBitsTl; int32_t iMaxBitsTl; - double dTlayerWeight; + int32_t iTlayerWeight; int32_t iGopBitsDq; //P frame level R-Q Model - double dLinearCmplx; + int64_t iLinearCmplx; // *INT_MULTIPLY int32_t iPFrameNum; int32_t iFrameCmplxMean; @@ -149,14 +152,14 @@ typedef struct TagRCTemporal { typedef struct TagWelsRc { int32_t iRcVaryPercentage; - double dRcVaryRatio; + int32_t iRcVaryRatio; int32_t iInitialQp; //initial qp int32_t iBitRate; int32_t iPreviousBitrate; int32_t iPreviousGopSize; double fFrameRate; - double dBitsPerFrame; + int32_t iBitsPerFrame; // *INT_MULTIPLY double dPreviousFps; // bits allocation and status @@ -169,7 +172,7 @@ typedef struct TagWelsRc { int32_t iIntraMbCount; int8_t iTlOfFrames[VGOP_SIZE]; - double dRemainingWeights; + int32_t iRemainingWeights; int32_t iFrameDqBits; double* pGomComplexity; @@ -195,9 +198,9 @@ typedef struct TagWelsRc { int32_t iMinQp; int32_t iMaxQp; //int32_t delta_adaptive_qp; - double dSkipBufferRatio; + int32_t iSkipBufferRatio; - double dQStep; + int32_t iQStep; // *INT_MULTIPLY int32_t iFrameDeltaQpUpper; int32_t iFrameDeltaQpLower; int32_t iLastCalculatedQScale; diff --git a/codec/encoder/core/src/ratectl.cpp b/codec/encoder/core/src/ratectl.cpp index 33fc7df5..e22fe8d1 100644 --- a/codec/encoder/core/src/ratectl.cpp +++ b/codec/encoder/core/src/ratectl.cpp @@ -56,6 +56,12 @@ FILE* fp_test_rc = NULL; FILE* fp_vgop = NULL; #endif #define _BITS_RANGE 0 +const int32_t g_kiQpToQstepTable[52] = { 63, 71, 79, 89, 100, 112, 126, 141, 159, 178, + 200, 224, 252, 283, 317, 356, 400, 449, 504, 566, + 635, 713, 800, 898, 1008, 1131, 1270, 1425, 1600, 1796, + 2016, 2263, 2540, 2851, 3200, 3592, 4032, 4525, 5080, 5702, + 6400, 7184, 8063, 9051,10159,11404,12800,14368,16127,18102, + 20319,22807}; //WELS_ROUND(INT_MULTIPLY*pow (2.0, (iQP - 4.0) / 6.0)) void RcInitLayerMemory (SWelsSvcRc* pWelsSvcRc, CMemoryAlign* pMA, const int32_t kiMaxTl) { const int32_t kiSliceNum = pWelsSvcRc->iSliceNum; @@ -96,11 +102,11 @@ void RcFreeLayerMemory (SWelsSvcRc* pWelsSvcRc, CMemoryAlign* pMA) { } } -static inline double RcConvertQp2QStep (double dQP) { - return pow (2.0, (dQP - 4.0) / 6.0); +static inline int32_t RcConvertQp2QStep (int32_t iQP) { + return g_kiQpToQstepTable[iQP]; } -static inline double RcConvertQStep2Qp (double dQpStep) { - return (6 * log (dQpStep) / log (2.0) + 4.0); +static inline int32_t RcConvertQStep2Qp (int32_t iQpStep) { + return WELS_ROUND((6 * log (iQpStep*1.0f/INT_MULTIPLY) / log (2.0) + 4.0)); } void RcInitSequenceParameter (sWelsEncCtx* pEncCtx) { @@ -125,14 +131,14 @@ void RcInitSequenceParameter (sWelsEncCtx* pEncCtx) { pWelsSvcRc->iSliceNum = pSliceCtx->iSliceNumInFrame; pWelsSvcRc->iRcVaryPercentage = _BITS_RANGE; // % -- for temp - pWelsSvcRc->dRcVaryRatio = (double)pWelsSvcRc->iRcVaryPercentage / MAX_BITS_VARY_PERCENTAGE; + pWelsSvcRc->iRcVaryRatio = pWelsSvcRc->iRcVaryPercentage; - pWelsSvcRc->dSkipBufferRatio = SKIP_RATIO; + pWelsSvcRc->iSkipBufferRatio = SKIP_RATIO; - pWelsSvcRc->iQpRangeUpperInFrame = QP_RANGE_UPPER_MODE1 - WELS_ROUND ((QP_RANGE_UPPER_MODE1 - QP_RANGE_MODE0) * - pWelsSvcRc->dRcVaryRatio); - pWelsSvcRc->iQpRangeLowerInFrame = QP_RANGE_LOWER_MODE1 - WELS_ROUND ((QP_RANGE_LOWER_MODE1 - QP_RANGE_MODE0) * - pWelsSvcRc->dRcVaryRatio); + pWelsSvcRc->iQpRangeUpperInFrame = (QP_RANGE_UPPER_MODE1 * MAX_BITS_VARY_PERCENTAGE - ((QP_RANGE_UPPER_MODE1 - QP_RANGE_MODE0) * + pWelsSvcRc->iRcVaryRatio)) / MAX_BITS_VARY_PERCENTAGE; + pWelsSvcRc->iQpRangeLowerInFrame = (QP_RANGE_LOWER_MODE1 * MAX_BITS_VARY_PERCENTAGE - ((QP_RANGE_LOWER_MODE1 - QP_RANGE_MODE0) * + pWelsSvcRc->iRcVaryRatio)) / MAX_BITS_VARY_PERCENTAGE; if (iMbWidth <= MB_WIDTH_THRESHOLD_90P) { pWelsSvcRc->iSkipQpValue = SKIP_QP_90P; @@ -151,17 +157,17 @@ void RcInitSequenceParameter (sWelsEncCtx* pEncCtx) { iGomRowMode0 = GOM_ROW_MODE0_720P; iGomRowMode1 = GOM_ROW_MODE1_720P; } - iGomRowMode0 = iGomRowMode1 + WELS_ROUND ((iGomRowMode0 - iGomRowMode1) * pWelsSvcRc->dRcVaryRatio); + iGomRowMode0 = iGomRowMode1 + ((iGomRowMode0 - iGomRowMode1) * pWelsSvcRc->iRcVaryRatio / MAX_BITS_VARY_PERCENTAGE); pWelsSvcRc->iNumberMbGom = iMbWidth * iGomRowMode0; pWelsSvcRc->iMinQp = GOM_MIN_QP_MODE; pWelsSvcRc->iMaxQp = GOM_MAX_QP_MODE; - pWelsSvcRc->iFrameDeltaQpUpper = LAST_FRAME_QP_RANGE_UPPER_MODE1 - WELS_ROUND ((LAST_FRAME_QP_RANGE_UPPER_MODE1 - - LAST_FRAME_QP_RANGE_UPPER_MODE0) * pWelsSvcRc->dRcVaryRatio); - pWelsSvcRc->iFrameDeltaQpLower = LAST_FRAME_QP_RANGE_LOWER_MODE1 - WELS_ROUND ((LAST_FRAME_QP_RANGE_LOWER_MODE1 - - LAST_FRAME_QP_RANGE_LOWER_MODE0) * pWelsSvcRc->dRcVaryRatio); + pWelsSvcRc->iFrameDeltaQpUpper = LAST_FRAME_QP_RANGE_UPPER_MODE1 - ((LAST_FRAME_QP_RANGE_UPPER_MODE1 - + LAST_FRAME_QP_RANGE_UPPER_MODE0) * pWelsSvcRc->iRcVaryRatio / MAX_BITS_VARY_PERCENTAGE); + pWelsSvcRc->iFrameDeltaQpLower = LAST_FRAME_QP_RANGE_LOWER_MODE1 - ((LAST_FRAME_QP_RANGE_LOWER_MODE1 - + LAST_FRAME_QP_RANGE_LOWER_MODE0) * pWelsSvcRc->iRcVaryRatio / MAX_BITS_VARY_PERCENTAGE); pWelsSvcRc->iSkipFrameNum = 0; pWelsSvcRc->iGomSize = (pWelsSvcRc->iNumberMbFrame + pWelsSvcRc->iNumberMbGom - 1) / pWelsSvcRc->iNumberMbGom; @@ -186,13 +192,14 @@ void RcInitTlWeight (sWelsEncCtx* pEncCtx) { const int32_t kiHighestTid = pDLayerParam->iHighestTemporalId; //Index 0:Virtual GOP size, Index 1:Frame rate - double WeightArray[4][4] = { {1.0, 0, 0, 0}, {0.6, 0.4, 0, 0}, {0.4, 0.3, 0.15, 0}, {0.25, 0.15, 0.125, 0.0875}}; + //double WeightArray[4][4] = { {1.0, 0, 0, 0}, {0.6, 0.4, 0, 0}, {0.4, 0.3, 0.15, 0}, {0.25, 0.15, 0.125, 0.0875}}; + int32_t iWeightArray[4][4] = { {2000, 0, 0, 0}, {1200, 800, 0, 0}, {800, 600, 300, 0}, {500, 300, 250, 175}}; // original*WEIGHT_MULTIPLY const int32_t kiGopSize = (1 << kiDecompositionStages); int32_t i, k, n; n = 0; while (n <= kiHighestTid) { - pTOverRc[n].dTlayerWeight = WeightArray[kiDecompositionStages][n]; + pTOverRc[n].iTlayerWeight = iWeightArray[kiDecompositionStages][n]; ++ n; } //Calculate the frame index for the current frame and its reference frame @@ -214,30 +221,30 @@ void RcUpdateBitrateFps (sWelsEncCtx* pEncCtx) { SDLayerParam* pDLayerParam = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId]; const int32_t kiGopSize = (1 << pDLayerParam->iDecompositionStages); const int32_t kiHighestTid = pDLayerParam->iHighestTemporalId; - double input_dBitsPerFrame = pDLayerParam->iSpatialBitrate / pDLayerParam->fInputFrameRate; - const int32_t kiGopBits = WELS_ROUND (input_dBitsPerFrame * kiGopSize); + int32_t input_iBitsPerFrame = WELS_ROUND(pDLayerParam->iSpatialBitrate * INT_MULTIPLY / pDLayerParam->fInputFrameRate); + const int32_t kiGopBits = WELS_DIV_ROUND(input_iBitsPerFrame * kiGopSize, INT_MULTIPLY); int32_t i; pWelsSvcRc->iBitRate = pDLayerParam->iSpatialBitrate; pWelsSvcRc->fFrameRate = pDLayerParam->fInputFrameRate; - double dTargetVaryRange = FRAME_iTargetBits_VARY_RANGE * (1.0 - pWelsSvcRc->dRcVaryRatio); - double dMinBitsRatio = 1.0 - dTargetVaryRange; - double dMaxBitsRatio = 1.0 + FRAME_iTargetBits_VARY_RANGE;//dTargetVaryRange; + int32_t iTargetVaryRange = FRAME_iTargetBits_VARY_RANGE * (MAX_BITS_VARY_PERCENTAGE - pWelsSvcRc->iRcVaryRatio); + int32_t iMinBitsRatio = (MAX_BITS_VARY_PERCENTAGE) * INT_MULTIPLY - iTargetVaryRange; + int32_t iMaxBitsRatio = (MAX_BITS_VARY_PERCENTAGE) * (INT_MULTIPLY + FRAME_iTargetBits_VARY_RANGE); for (i = 0; i <= kiHighestTid; i++) { - const double kdConstraitBits = kiGopBits * pTOverRc[i].dTlayerWeight; - pTOverRc[i].iMinBitsTl = WELS_ROUND (kdConstraitBits * dMinBitsRatio); - pTOverRc[i].iMaxBitsTl = WELS_ROUND (kdConstraitBits * dMaxBitsRatio); + const int64_t kdConstraitBits = kiGopBits * pTOverRc[i].iTlayerWeight; + pTOverRc[i].iMinBitsTl = WELS_DIV_ROUND(kdConstraitBits * iMinBitsRatio, INT_MULTIPLY * MAX_BITS_VARY_PERCENTAGE * WEIGHT_MULTIPLY); + pTOverRc[i].iMaxBitsTl = WELS_DIV_ROUND(kdConstraitBits * iMaxBitsRatio, INT_MULTIPLY * MAX_BITS_VARY_PERCENTAGE * WEIGHT_MULTIPLY); } //When bitrate is changed, pBuffer size should be updated - pWelsSvcRc->iBufferSizeSkip = WELS_ROUND (pWelsSvcRc->iBitRate * pWelsSvcRc->dSkipBufferRatio); - pWelsSvcRc->iBufferSizePadding = WELS_ROUND (pWelsSvcRc->iBitRate * PADDING_BUFFER_RATIO); + pWelsSvcRc->iBufferSizeSkip = WELS_DIV_ROUND(pWelsSvcRc->iBitRate * pWelsSvcRc->iSkipBufferRatio, INT_MULTIPLY); + pWelsSvcRc->iBufferSizePadding = WELS_DIV_ROUND(pWelsSvcRc->iBitRate * PADDING_BUFFER_RATIO, INT_MULTIPLY); //change remaining bits - if (pWelsSvcRc->dBitsPerFrame > 0.1) - pWelsSvcRc->iRemainingBits = WELS_ROUND (pWelsSvcRc->iRemainingBits * input_dBitsPerFrame / pWelsSvcRc->dBitsPerFrame); - pWelsSvcRc->dBitsPerFrame = input_dBitsPerFrame; + if (pWelsSvcRc->iBitsPerFrame > REMAIN_BITS_TH) + pWelsSvcRc->iRemainingBits = pWelsSvcRc->iRemainingBits * input_iBitsPerFrame / pWelsSvcRc->iBitsPerFrame; + pWelsSvcRc->iBitsPerFrame = input_iBitsPerFrame; } @@ -247,8 +254,8 @@ void RcInitVGop (sWelsEncCtx* pEncCtx) { SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; const int32_t kiHighestTid = pEncCtx->pSvcParam->sDependencyLayers[kiDid].iHighestTemporalId; - pWelsSvcRc->iRemainingBits = WELS_ROUND (VGOP_SIZE * pWelsSvcRc->dBitsPerFrame); - pWelsSvcRc->dRemainingWeights = pWelsSvcRc->iGopNumberInVGop; + pWelsSvcRc->iRemainingBits = WELS_DIV_ROUND(VGOP_SIZE * pWelsSvcRc->iBitsPerFrame, INT_MULTIPLY); + pWelsSvcRc->iRemainingWeights = pWelsSvcRc->iGopNumberInVGop * WEIGHT_MULTIPLY; pWelsSvcRc->iFrameCodedInVGop = 0; pWelsSvcRc->iGopIndexInVGop = 0; @@ -273,7 +280,7 @@ void RcInitRefreshParameter (sWelsEncCtx* pEncCtx) { //P frame R-Q Model for (i = 0; i <= kiHighestTid; i++) { pTOverRc[i].iPFrameNum = 0; - pTOverRc[i].dLinearCmplx = 0.0; + pTOverRc[i].iLinearCmplx = 0; pTOverRc[i].iFrameCmplxMean = 0; } @@ -282,7 +289,7 @@ void RcInitRefreshParameter (sWelsEncCtx* pEncCtx) { pWelsSvcRc->iGopIndexInVGop = 0; pWelsSvcRc->iRemainingBits = 0; - pWelsSvcRc->dBitsPerFrame = 0.0; + pWelsSvcRc->iBitsPerFrame = 0; //Backup the initial bitrate and fps pWelsSvcRc->iPreviousBitrate = pDLayerParam->iSpatialBitrate; @@ -404,7 +411,7 @@ void RcInitIdrQp (sWelsEncCtx* pEncCtx) { pWelsSvcRc->iInitialQp = dInitialQPArray[iBppIndex][i]; pWelsSvcRc->iInitialQp = WELS_CLIP3 (pWelsSvcRc->iInitialQp, MIN_IDR_QP, MAX_IDR_QP); pEncCtx->iGlobalQp = pWelsSvcRc->iInitialQp; - pWelsSvcRc->dQStep = RcConvertQp2QStep (pEncCtx->iGlobalQp); + pWelsSvcRc->iQStep = RcConvertQp2QStep (pEncCtx->iGlobalQp); pWelsSvcRc->iLastCalculatedQScale = pEncCtx->iGlobalQp; } @@ -412,14 +419,14 @@ void RcCalculateIdrQp (sWelsEncCtx* pEncCtx) { SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; //obtain the idr qp using previous idr complexity if (pWelsSvcRc->iNumberMbFrame != pWelsSvcRc->iIntraMbCount) { - pWelsSvcRc->iIntraComplexity = WELS_ROUND ((double)pWelsSvcRc->iIntraComplexity * pWelsSvcRc->iNumberMbFrame / - pWelsSvcRc->iIntraMbCount); + pWelsSvcRc->iIntraComplexity = pWelsSvcRc->iIntraComplexity * pWelsSvcRc->iNumberMbFrame / + pWelsSvcRc->iIntraMbCount; } - pWelsSvcRc->iInitialQp = WELS_ROUND (RcConvertQStep2Qp ((double)pWelsSvcRc->iIntraComplexity / - pWelsSvcRc->iTargetBits)); + pWelsSvcRc->iInitialQp = RcConvertQStep2Qp (pWelsSvcRc->iIntraComplexity / + pWelsSvcRc->iTargetBits); pWelsSvcRc->iInitialQp = WELS_CLIP3 (pWelsSvcRc->iInitialQp, MIN_IDR_QP, MAX_IDR_QP); pEncCtx->iGlobalQp = pWelsSvcRc->iInitialQp; - pWelsSvcRc->dQStep = RcConvertQp2QStep (pEncCtx->iGlobalQp); + pWelsSvcRc->iQStep = RcConvertQp2QStep (pEncCtx->iGlobalQp); pWelsSvcRc->iLastCalculatedQScale = pEncCtx->iGlobalQp; } @@ -450,22 +457,22 @@ void RcCalculatePictureQp (sWelsEncCtx* pEncCtx) { pWelsSvcRc->iLastCalculatedQScale + pWelsSvcRc->iFrameDeltaQpUpper + iDeltaQpTemporal); iLumaQp = WELS_CLIP3 (iLumaQp, GOM_MIN_QP_MODE, MAX_LOW_BR_QP); - pWelsSvcRc->dQStep = RcConvertQp2QStep (iLumaQp); + pWelsSvcRc->iQStep = RcConvertQp2QStep (iLumaQp); pWelsSvcRc->iLastCalculatedQScale = iLumaQp; if (pEncCtx->pSvcParam->bEnableAdaptiveQuant) { - iLumaQp = WELS_ROUND (iLumaQp - pEncCtx->pVaa->sAdaptiveQuantParam.dAverMotionTextureIndexToDeltaQp); + iLumaQp = WELS_CLIP3 ((iLumaQp*INT_MULTIPLY - pEncCtx->pVaa->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp)/INT_MULTIPLY, GOM_MIN_QP_MODE, MAX_LOW_BR_QP); } pEncCtx->iGlobalQp = iLumaQp; return; } else { - double dCmplxRatio = (double)pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity / pTOverRc->iFrameCmplxMean; - dCmplxRatio = WELS_CLIP3 (dCmplxRatio, 1.0 - FRAME_CMPLX_RATIO_RANGE, 1.0 + FRAME_CMPLX_RATIO_RANGE); + int64_t iCmplxRatio = WELS_DIV_ROUND64(pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity *INT_MULTIPLY, pTOverRc->iFrameCmplxMean); + iCmplxRatio = WELS_CLIP3 (iCmplxRatio, INT_MULTIPLY - FRAME_CMPLX_RATIO_RANGE, INT_MULTIPLY + FRAME_CMPLX_RATIO_RANGE); - pWelsSvcRc->dQStep = pTOverRc->dLinearCmplx * dCmplxRatio / pWelsSvcRc->iTargetBits; - iLumaQp = WELS_ROUND (RcConvertQStep2Qp (pWelsSvcRc->dQStep)); + pWelsSvcRc->iQStep = WELS_DIV_ROUND((pTOverRc->iLinearCmplx * iCmplxRatio), (pWelsSvcRc->iTargetBits * INT_MULTIPLY)); + iLumaQp = RcConvertQStep2Qp (pWelsSvcRc->iQStep); //limit QP int32_t iLastIdxCodecInVGop = pWelsSvcRc->iFrameCodedInVGop - 1; @@ -485,12 +492,12 @@ void RcCalculatePictureQp (sWelsEncCtx* pEncCtx) { iLumaQp = WELS_CLIP3 (iLumaQp, GOM_MIN_QP_MODE, GOM_MAX_QP_MODE); - pWelsSvcRc->dQStep = RcConvertQp2QStep (iLumaQp); + pWelsSvcRc->iQStep = RcConvertQp2QStep (iLumaQp); pWelsSvcRc->iLastCalculatedQScale = iLumaQp; #ifndef _NOT_USE_AQ_FOR_TEST_ if (pEncCtx->pSvcParam->bEnableAdaptiveQuant) { - iLumaQp = WELS_ROUND (iLumaQp - pEncCtx->pVaa->sAdaptiveQuantParam.dAverMotionTextureIndexToDeltaQp); + iLumaQp = WELS_DIV_ROUND(iLumaQp*INT_MULTIPLY - pEncCtx->pVaa->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp,INT_MULTIPLY); if (pEncCtx->pSvcParam->iRCMode != RC_LOW_BW_MODE) iLumaQp = WELS_CLIP3 (iLumaQp, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp); @@ -505,7 +512,7 @@ void RcInitSliceInformation (sWelsEncCtx* pEncCtx) { SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; SRCSlicing* pSOverRc = &pWelsSvcRc->pSlicingOverRc[0]; const int32_t kiSliceNum = pCurSliceCtx->iSliceNumInFrame; - const double kdBitsPerMb = (double)pWelsSvcRc->iTargetBits / pWelsSvcRc->iNumberMbFrame; + const int32_t kdBitsPerMb = WELS_DIV_ROUND(pWelsSvcRc->iTargetBits * INT_MULTIPLY, pWelsSvcRc->iNumberMbFrame); for (int32_t i = 0; i < kiSliceNum; i++) { pSOverRc->iStartMbSlice = @@ -513,7 +520,7 @@ void RcInitSliceInformation (sWelsEncCtx* pEncCtx) { pSOverRc->iEndMbSlice += (pCurSliceCtx->pCountMbNumInSlice[i] - 1); pSOverRc->iTotalQpSlice = 0; pSOverRc->iTotalMbSlice = 0; - pSOverRc->iTargetBitsSlice = WELS_ROUND (kdBitsPerMb * pCurSliceCtx->pCountMbNumInSlice[i]); + pSOverRc->iTargetBitsSlice = WELS_DIV_ROUND(kdBitsPerMb * pCurSliceCtx->pCountMbNumInSlice[i], INT_MULTIPLY); pSOverRc->iFrameBitsSlice = 0; pSOverRc->iGomBitsSlice = 0; ++ pSOverRc; @@ -527,10 +534,10 @@ void RcDecideTargetBits (sWelsEncCtx* pEncCtx) { pWelsSvcRc->iCurrentBitsLevel = BITS_NORMAL; //allocate bits if (pEncCtx->eSliceType == I_SLICE) { - pWelsSvcRc->iTargetBits = WELS_ROUND (pWelsSvcRc->dBitsPerFrame * IDR_BITRATE_RATIO); + pWelsSvcRc->iTargetBits = WELS_DIV_ROUND(pWelsSvcRc->iBitsPerFrame * IDR_BITRATE_RATIO, INT_MULTIPLY); } else { - pWelsSvcRc->iTargetBits = WELS_ROUND (pWelsSvcRc->iRemainingBits * pTOverRc->dTlayerWeight / - pWelsSvcRc->dRemainingWeights); + pWelsSvcRc->iTargetBits = (pWelsSvcRc->iRemainingBits * pTOverRc->iTlayerWeight / + pWelsSvcRc->iRemainingWeights); if ((pWelsSvcRc->iTargetBits <= 0) && (pEncCtx->pSvcParam->iRCMode == RC_LOW_BW_MODE)) { pWelsSvcRc->iCurrentBitsLevel = BITS_EXCEEDED; } else if ((pWelsSvcRc->iTargetBits <= pTOverRc->iMinBitsTl) && (pEncCtx->pSvcParam->iRCMode == RC_LOW_BW_MODE)) { @@ -538,7 +545,7 @@ void RcDecideTargetBits (sWelsEncCtx* pEncCtx) { } pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, pTOverRc->iMinBitsTl, pTOverRc->iMaxBitsTl); } - pWelsSvcRc->dRemainingWeights -= pTOverRc->dTlayerWeight; + pWelsSvcRc->iRemainingWeights -= pTOverRc->iTlayerWeight; } @@ -602,7 +609,7 @@ void RcGomTargetBits (sWelsEncCtx* pEncCtx, const int32_t kiSliceId) { SWelsSvcRc* pWelsSvcRc_Base = NULL; SRCSlicing* pSOverRc = &pWelsSvcRc->pSlicingOverRc[kiSliceId]; - double dAllocateBits = 0; + int32_t iAllocateBits = 0; int32_t iSumSad = 0; int32_t iLastGomIndex = 0; int32_t iLeftBits = 0; @@ -616,7 +623,7 @@ void RcGomTargetBits (sWelsEncCtx* pEncCtx, const int32_t kiSliceId) { pSOverRc->iGomTargetBits = 0; return; } else if (kiComplexityIndex >= iLastGomIndex) { - dAllocateBits = iLeftBits; + iAllocateBits = iLeftBits; } else { pWelsSvcRc_Base = RcJudgeBaseUsability (pEncCtx); pWelsSvcRc_Base = (pWelsSvcRc_Base) ? pWelsSvcRc_Base : pWelsSvcRc; @@ -624,12 +631,12 @@ void RcGomTargetBits (sWelsEncCtx* pEncCtx, const int32_t kiSliceId) { iSumSad += pWelsSvcRc_Base->pCurrentFrameGomSad[i]; } if (0 == iSumSad) - dAllocateBits = (double)iLeftBits / (iLastGomIndex - kiComplexityIndex); + iAllocateBits = WELS_DIV_ROUND(iLeftBits, (iLastGomIndex - kiComplexityIndex)); else - dAllocateBits = (double)iLeftBits * pWelsSvcRc_Base->pCurrentFrameGomSad[kiComplexityIndex + 1] / iSumSad; + iAllocateBits = WELS_DIV_ROUND(iLeftBits * pWelsSvcRc_Base->pCurrentFrameGomSad[kiComplexityIndex + 1], iSumSad); } - pSOverRc->iGomTargetBits = WELS_ROUND (dAllocateBits); + pSOverRc->iGomTargetBits = iAllocateBits; } @@ -637,7 +644,7 @@ void RcGomTargetBits (sWelsEncCtx* pEncCtx, const int32_t kiSliceId) { void RcCalculateGomQp (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iSliceId) { SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; SRCSlicing* pSOverRc = &pWelsSvcRc->pSlicingOverRc[iSliceId]; - double dBitsRatio = 1.0; + int32_t iBitsRatio = 1; int32_t iLeftBits = pSOverRc->iTargetBitsSlice - pSOverRc->iFrameBitsSlice; int32_t iTargetLeftBits = iLeftBits + pSOverRc->iGomBitsSlice - pSOverRc->iGomTargetBits; @@ -646,14 +653,14 @@ void RcCalculateGomQp (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iSliceId) { pSOverRc->iCalculatedQpSlice += 2; } else { //globe decision - dBitsRatio = iLeftBits / (iTargetLeftBits + 0.1); - if (dBitsRatio < 0.8409) //2^(-1.5/6) + iBitsRatio = 10000 * iLeftBits / (iTargetLeftBits+1); + if (iBitsRatio < 8409) //2^(-1.5/6)*10000 pSOverRc->iCalculatedQpSlice += 2; - else if (dBitsRatio < 0.9439) //2^(-0.5/6) + else if (iBitsRatio < 9439) //2^(-0.5/6)*10000 pSOverRc->iCalculatedQpSlice += 1; - else if (dBitsRatio > 1.06) //2^(0.5/6) + else if (iBitsRatio > 10600) //2^(0.5/6)*10000 pSOverRc->iCalculatedQpSlice -= 1; - else if (dBitsRatio > 1.19) //2^(1.5/6) + else if (iBitsRatio > 11900) //2^(1.5/6)*10000 pSOverRc->iCalculatedQpSlice -= 2; } @@ -669,11 +676,11 @@ void RcCalculateGomQp (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iSliceId) { void RcVBufferCalculationSkip (sWelsEncCtx* pEncCtx) { SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; SRCTemporal* pTOverRc = pWelsSvcRc->pTemporalOverRc; - const int32_t kiOutputBits = WELS_ROUND (pWelsSvcRc->dBitsPerFrame); + const int32_t kiOutputBits = WELS_DIV_ROUND(pWelsSvcRc->iBitsPerFrame, INT_MULTIPLY); //condition 1: whole pBuffer fullness pWelsSvcRc->iBufferFullnessSkip += (pWelsSvcRc->iFrameDqBits - kiOutputBits); //condition 2: VGOP bits constraint - const int32_t kiVGopBits = WELS_ROUND (pWelsSvcRc->dBitsPerFrame * VGOP_SIZE); + const int32_t kiVGopBits = WELS_DIV_ROUND(pWelsSvcRc->iBitsPerFrame * VGOP_SIZE, INT_MULTIPLY); int32_t iVGopBitsPred = 0; for (int32_t i = pWelsSvcRc->iFrameCodedInVGop + 1; i < VGOP_SIZE; i++) iVGopBitsPred += pTOverRc[pWelsSvcRc->iTlOfFrames[i]].iMinBitsTl; @@ -694,7 +701,7 @@ void RcVBufferCalculationSkip (sWelsEncCtx* pEncCtx) { pWelsSvcRc->iBufferFullnessSkip = 0; if (pEncCtx->iSkipFrameFlag == 1) { - pWelsSvcRc->iRemainingBits += WELS_ROUND (pWelsSvcRc->dBitsPerFrame); + pWelsSvcRc->iRemainingBits += WELS_DIV_ROUND(pWelsSvcRc->iBitsPerFrame, INT_MULTIPLY); pWelsSvcRc->iSkipFrameNum++; pWelsSvcRc->iSkipFrameInVGop++; } @@ -702,8 +709,8 @@ void RcVBufferCalculationSkip (sWelsEncCtx* pEncCtx) { void RcVBufferCalculationPadding (sWelsEncCtx* pEncCtx) { SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; - const int32_t kiOutputBits = WELS_ROUND (pWelsSvcRc->dBitsPerFrame); - const int32_t kiBufferThreshold = WELS_ROUND (PADDING_THRESHOLD * (-pWelsSvcRc->iBufferSizePadding)); + const int32_t kiOutputBits = WELS_DIV_ROUND(pWelsSvcRc->iBitsPerFrame, INT_MULTIPLY); + const int32_t kiBufferThreshold = WELS_DIV_ROUND(PADDING_THRESHOLD * (-pWelsSvcRc->iBufferSizePadding), INT_MULTIPLY); pWelsSvcRc->iBufferFullnessPadding += (pWelsSvcRc->iFrameDqBits - kiOutputBits); @@ -740,7 +747,7 @@ void RcUpdatePictureQpBits (sWelsEncCtx* pEncCtx, int32_t iCodedBits) { ++ pSOverRc; } if (iTotalMb > 0) - pWelsSvcRc->iAverageFrameQp = WELS_ROUND (1.0 * iTotalQp / iTotalMb); + pWelsSvcRc->iAverageFrameQp = WELS_DIV_ROUND(INT_MULTIPLY * iTotalQp, iTotalMb * INT_MULTIPLY); else pWelsSvcRc->iAverageFrameQp = pEncCtx->iGlobalQp; } else { @@ -752,12 +759,11 @@ void RcUpdatePictureQpBits (sWelsEncCtx* pEncCtx, int32_t iCodedBits) { void RcUpdateIntraComplexity (sWelsEncCtx* pEncCtx) { SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId]; - double iAlpha = 1.0 / (1 + pWelsSvcRc->iIdrNum); - if (iAlpha < 0.25) iAlpha = 0.25; + int32_t iAlpha = WELS_DIV_ROUND(INT_MULTIPLY, (1 + pWelsSvcRc->iIdrNum)); + if (iAlpha < (INT_MULTIPLY/4)) iAlpha = INT_MULTIPLY/4; - double dIntraCmplx = pWelsSvcRc->dQStep * pWelsSvcRc->iFrameDqBits; - dIntraCmplx = (1.0 - iAlpha) * pWelsSvcRc->iIntraComplexity + iAlpha * dIntraCmplx; - pWelsSvcRc->iIntraComplexity = WELS_ROUND (dIntraCmplx); + int64_t iIntraCmplx = pWelsSvcRc->iQStep * pWelsSvcRc->iFrameDqBits; + pWelsSvcRc->iIntraComplexity = WELS_DIV_ROUND(((INT_MULTIPLY - iAlpha) * pWelsSvcRc->iIntraComplexity + iAlpha * iIntraCmplx), INT_MULTIPLY); pWelsSvcRc->iIntraMbCount = pWelsSvcRc->iNumberMbFrame; pWelsSvcRc->iIdrNum++; @@ -771,16 +777,16 @@ void RcUpdateFrameComplexity (sWelsEncCtx* pEncCtx) { SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[kiTl]; if (0 == pTOverRc->iPFrameNum) { - pTOverRc->dLinearCmplx = pWelsSvcRc->iFrameDqBits * pWelsSvcRc->dQStep; + pTOverRc->iLinearCmplx = ((int64_t)pWelsSvcRc->iFrameDqBits) * pWelsSvcRc->iQStep; } else { - pTOverRc->dLinearCmplx = LINEAR_MODEL_DECAY_FACTOR * pTOverRc->dLinearCmplx - + (1.0 - LINEAR_MODEL_DECAY_FACTOR) * (pWelsSvcRc->iFrameDqBits * pWelsSvcRc->dQStep); + pTOverRc->iLinearCmplx = WELS_DIV_ROUND64((LINEAR_MODEL_DECAY_FACTOR * pTOverRc->iLinearCmplx + + (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * (pWelsSvcRc->iFrameDqBits * pWelsSvcRc->iQStep)), INT_MULTIPLY); } - double iAlpha = 1.0 / (1 + pTOverRc->iPFrameNum); + int32_t iAlpha = WELS_DIV_ROUND(INT_MULTIPLY, (1 + pTOverRc->iPFrameNum)); if (iAlpha < SMOOTH_FACTOR_MIN_VALUE) iAlpha = SMOOTH_FACTOR_MIN_VALUE; - pTOverRc->iFrameCmplxMean = WELS_ROUND ((1.0 - iAlpha) * pTOverRc->iFrameCmplxMean + iAlpha * - pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity); + pTOverRc->iFrameCmplxMean = WELS_DIV_ROUND(((INT_MULTIPLY - iAlpha) * pTOverRc->iFrameCmplxMean + iAlpha * + pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity), INT_MULTIPLY); pTOverRc->iPFrameNum++; if (pTOverRc->iPFrameNum > 255) @@ -925,8 +931,8 @@ void WelsRcPictureInitDisable (void* pCtx) { pEncCtx->iGlobalQp = RcCalculateCascadingQp (pEncCtx, kiQp); if (pEncCtx->pSvcParam->bEnableAdaptiveQuant && (pEncCtx->eSliceType == P_SLICE)) { - pEncCtx->iGlobalQp = WELS_CLIP3 (WELS_ROUND (pEncCtx->iGlobalQp - - pEncCtx->pVaa->sAdaptiveQuantParam.dAverMotionTextureIndexToDeltaQp), GOM_MIN_QP_MODE, GOM_MAX_QP_MODE); + pEncCtx->iGlobalQp = WELS_CLIP3 ( (pEncCtx->iGlobalQp *INT_MULTIPLY - + pEncCtx->pVaa->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp)/INT_MULTIPLY, GOM_MIN_QP_MODE, GOM_MAX_QP_MODE); } else { pEncCtx->iGlobalQp = WELS_CLIP3 (pEncCtx->iGlobalQp, 0, 51); } diff --git a/codec/encoder/core/src/slice_multi_threading.cpp b/codec/encoder/core/src/slice_multi_threading.cpp index 3bb590f1..8797bb10 100644 --- a/codec/encoder/core/src/slice_multi_threading.cpp +++ b/codec/encoder/core/src/slice_multi_threading.cpp @@ -120,9 +120,9 @@ void UpdateMbListNeighborParallel (SSliceCtx* pSliceCtx, } void CalcSliceComplexRatio (void* pRatio, SSliceCtx* pSliceCtx, uint32_t* pSliceConsume) { - float* pRatioList = (float*)pRatio; - float fAvI[MAX_SLICES_NUM]; - float fSumAv = .0f; + int32_t* pRatioList = (int32_t*)pRatio; + int32_t iAvI[MAX_SLICES_NUM]; + int32_t iSumAv = 0; uint32_t* pSliceTime = (uint32_t*)pSliceConsume; int32_t* pCountMbInSlice = (int32_t*)pSliceCtx->pCountMbNumInSlice; const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; @@ -131,16 +131,16 @@ void CalcSliceComplexRatio (void* pRatio, SSliceCtx* pSliceCtx, uint32_t* pSlice WelsEmms(); while (iSliceIdx < kiSliceCount) { - fAvI[iSliceIdx] = 1.0f * pCountMbInSlice[iSliceIdx] / pSliceTime[iSliceIdx]; + iAvI[iSliceIdx] = WELS_DIV_ROUND(INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]); MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d\n", iSliceIdx, pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]); - fSumAv += fAvI[iSliceIdx]; + iSumAv += iAvI[iSliceIdx]; ++ iSliceIdx; } while (-- iSliceIdx >= 0) { - pRatioList[iSliceIdx] = fAvI[iSliceIdx] / fSumAv; + pRatioList[iSliceIdx] = WELS_DIV_ROUND(INT_MULTIPLY * iAvI[iSliceIdx], iSumAv); } } @@ -200,7 +200,7 @@ void DynamicAdjustSlicing (sWelsEncCtx* pCtx, const int32_t kiCountNumMb = pSliceCtx->iMbNumInFrame; int32_t iMinimalMbNum = pSliceCtx->iMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required int32_t iMaximalMbNum = 0; // dynamically assign later - float* pSliceComplexRatio = (float*)pComplexRatio; + int32_t* pSliceComplexRatio = (int32_t*)pComplexRatio; int32_t iMbNumLeft = kiCountNumMb; int32_t iRunLen[MAX_THREADS_NUM] = {0}; int32_t iSliceIdx = 0; @@ -236,11 +236,11 @@ void DynamicAdjustSlicing (sWelsEncCtx* pCtx, iSliceIdx = 0; while (iSliceIdx + 1 < kiCountSliceNum) { - int32_t iNumMbAssigning = WELS_ROUND (kiCountNumMb * pSliceComplexRatio[iSliceIdx]); + int32_t iNumMbAssigning = WELS_DIV_ROUND(kiCountNumMb * pSliceComplexRatio[iSliceIdx], INT_MULTIPLY); // GOM boundary aligned if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { - iNumMbAssigning = WELS_ROUND (1.0f * iNumMbAssigning / iNumMbInEachGom) * iNumMbInEachGom; + iNumMbAssigning = iNumMbAssigning / iNumMbInEachGom * iNumMbInEachGom; } // make sure one GOM at least in each pSlice for safe @@ -259,14 +259,14 @@ void DynamicAdjustSlicing (sWelsEncCtx* pCtx, iRunLen[iSliceIdx] = iNumMbAssigning; MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), uiSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d\n", - iSliceIdx, pSliceComplexRatio[iSliceIdx], pSliceCtx->pCountMbNumInSlice[iSliceIdx], iNumMbAssigning); + iSliceIdx, pSliceComplexRatio[iSliceIdx]* 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx], iNumMbAssigning); ++ iSliceIdx; iMaximalMbNum = iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts } iRunLen[iSliceIdx] = iMbNumLeft; MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d\n", - iSliceIdx, pSliceComplexRatio[iSliceIdx], pSliceCtx->pCountMbNumInSlice[iSliceIdx], iMbNumLeft); + iSliceIdx, pSliceComplexRatio[iSliceIdx]* 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx], iMbNumLeft); if (DynamicAdjustSlicePEncCtxAll (pSliceCtx, iRunLen) == 0) { @@ -329,7 +329,7 @@ int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingPara && pPara->iMultipleThreadIdc >= kiSliceNum) { pSmt->pSliceConsumeTime[iIdx] = (uint32_t*)pMa->WelsMallocz (kiSliceNum * sizeof (uint32_t), "pSliceConsumeTime[]"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceConsumeTime[iIdx]), FreeMemorySvc (ppCtx)) - pSmt->pSliceComplexRatio[iIdx] = (float*)pMa->WelsMalloc (kiSliceNum * sizeof (float), "pSliceComplexRatio[]"); + pSmt->pSliceComplexRatio[iIdx] = (int32_t*)pMa->WelsMalloc (kiSliceNum * sizeof (int32_t), "pSliceComplexRatio[]"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceComplexRatio[iIdx]), FreeMemorySvc (ppCtx)) } else { pSmt->pSliceConsumeTime[iIdx] = NULL; diff --git a/codec/encoder/core/src/svc_enc_slice_segment.cpp b/codec/encoder/core/src/svc_enc_slice_segment.cpp index 465ec5eb..59cb41f1 100644 --- a/codec/encoder/core/src/svc_enc_slice_segment.cpp +++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp @@ -276,7 +276,7 @@ void GomValidCheckSliceMbNum (const int32_t kiMbWidth, const int32_t kiMbHeight, while (uiSliceIdx + 1 < kuiSliceNum) { // GOM boundary aligned - int32_t iNumMbAssigning = WELS_ROUND (1.0f * kiMbNumPerSlice / iGomSize) * iGomSize; + int32_t iNumMbAssigning = WELS_DIV_ROUND(INT_MULTIPLY * kiMbNumPerSlice, iGomSize * INT_MULTIPLY) * iGomSize; // make sure one GOM at least in each slice for safe if (iNumMbAssigning < iMinimalMbNum) diff --git a/codec/encoder/core/src/wels_preprocess.cpp b/codec/encoder/core/src/wels_preprocess.cpp index d9f2f0ae..52193d0d 100644 --- a/codec/encoder/core/src/wels_preprocess.cpp +++ b/codec/encoder/core/src/wels_preprocess.cpp @@ -663,7 +663,7 @@ void CWelsPreProcess::BackgroundDetection (SVAAFrameInfo* pVaaInfo, SPicture* pC void CWelsPreProcess::AdaptiveQuantCalculation (SVAAFrameInfo* pVaaInfo, SPicture* pCurPicture, SPicture* pRefPicture) { pVaaInfo->sAdaptiveQuantParam.pCalcResult = & (pVaaInfo->sVaaCalcInfo); - pVaaInfo->sAdaptiveQuantParam.dAverMotionTextureIndexToDeltaQp = 0; + pVaaInfo->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp = 0; { int32_t iMethodIdx = METHOD_ADAPTIVE_QUANT; diff --git a/codec/processing/interface/IWelsVP.h b/codec/processing/interface/IWelsVP.h index 96467485..c0feb269 100644 --- a/codec/processing/interface/IWelsVP.h +++ b/codec/processing/interface/IWelsVP.h @@ -209,7 +209,7 @@ typedef struct { SMotionTextureUnit* pMotionTextureUnit; signed char* pMotionTextureIndexToDeltaQp; - double dAverMotionTextureIndexToDeltaQp; + int iAverMotionTextureIndexToDeltaQp; // *AQ_STEP_INT_MULTIPLY } SAdaptiveQuantizationParam; typedef enum { diff --git a/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp b/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp index c45ee545..4100d45e 100644 --- a/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp +++ b/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp @@ -30,16 +30,16 @@ * */ #include "AdaptiveQuantization.h" - +#include "macros.h" WELSVP_NAMESPACE_BEGIN -#define AVERAGE_TIME_MOTION (0.3) //0.3046875 // 1/4 + 1/16 - 1/128 ~ 0.3 -#define AVERAGE_TIME_TEXTURE_QUALITYMODE (1.0) //0.5 // 1/2 -#define AVERAGE_TIME_TEXTURE_BITRATEMODE (0.875) //0.5 // 1/2 -#define MODEL_ALPHA (0.9910) //1.5 //1.1102 -#define MODEL_TIME (5.8185) //9.0 //5.9842 +#define AVERAGE_TIME_MOTION (3000) //0.3046875 // 1/4 + 1/16 - 1/128 ~ 0.3 *AQ_TIME_INT_MULTIPLY +#define AVERAGE_TIME_TEXTURE_QUALITYMODE (10000) //0.5 // 1/2 *AQ_TIME_INT_MULTIPLY +#define AVERAGE_TIME_TEXTURE_BITRATEMODE (8750) //0.5 // 1/2 *AQ_TIME_INT_MULTIPLY +#define MODEL_ALPHA (9910) //1.5 //1.1102 *AQ_TIME_INT_MULTIPLY +#define MODEL_TIME (58185) //9.0 //5.9842 *AQ_TIME_INT_MULTIPLY /////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -65,14 +65,14 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix SMotionTextureUnit* pMotionTexture = NULL; SVAACalcResult* pVaaCalcResults = NULL; - int8_t iMotionTextureIndexToDeltaQp = 0; + int32_t iMotionTextureIndexToDeltaQp = 0; int32_t iAverMotionTextureIndexToDeltaQp = 0; // double to uint32 - double dAverageMotionIndex = 0.0; // double to float - double dAverageTextureIndex = 0.0; + int64_t iAverageMotionIndex = 0; // double to float + int64_t iAverageTextureIndex = 0; - double dQStep = 0.0; - double dLumaMotionDeltaQp = 0; - double dLumaTextureDeltaQp = 0; + int64_t iQStep = 0; + int64_t iLumaMotionDeltaQp = 0; + int64_t iLumaTextureDeltaQp = 0; uint8_t* pRefFrameY = NULL, *pCurFrameY = NULL; int32_t iRefStride = 0, iCurStride = 0; @@ -88,8 +88,8 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix /////////////////////////////////////// motion ////////////////////////////////// // motion MB residual variance - dAverageMotionIndex = 0.0; - dAverageTextureIndex = 0.0; + iAverageMotionIndex = 0; + iAverageTextureIndex = 0; pMotionTexture = m_sAdaptiveQuantParam.pMotionTextureUnit; pVaaCalcResults = m_sAdaptiveQuantParam.pCalcResult; @@ -115,8 +115,8 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix uiSum = uiSum >> 8; pMotionTexture->uiTextureIndex = (iSQSum >> 8) - (uiSum * uiSum); - dAverageMotionIndex += pMotionTexture->uiMotionIndex; - dAverageTextureIndex += pMotionTexture->uiTextureIndex; + iAverageMotionIndex += pMotionTexture->uiMotionIndex; + iAverageTextureIndex += pMotionTexture->uiTextureIndex; pMotionTexture++; ++iMbIndex; pRefFrameTmp += MB_WIDTH_LUMA; @@ -131,8 +131,8 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix pCurFrameTmp = pCurFrameY; for (i = 0; i < iMbWidth; i++) { m_pfVar (pRefFrameTmp, iRefStride, pCurFrameTmp, iCurStride, pMotionTexture); - dAverageMotionIndex += pMotionTexture->uiMotionIndex; - dAverageTextureIndex += pMotionTexture->uiTextureIndex; + iAverageMotionIndex += pMotionTexture->uiMotionIndex; + iAverageTextureIndex += pMotionTexture->uiTextureIndex; pMotionTexture++; pRefFrameTmp += MB_WIDTH_LUMA; pCurFrameTmp += MB_WIDTH_LUMA; @@ -142,49 +142,51 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix pCurFrameY += (iCurStride) << 4; } } - dAverageMotionIndex = dAverageMotionIndex / iMbTotalNum; - dAverageTextureIndex = dAverageTextureIndex / iMbTotalNum; - if ((dAverageMotionIndex <= PESN) && (dAverageMotionIndex >= -PESN)) { - dAverageMotionIndex = 1.0; + iAverageMotionIndex = WELS_DIV_ROUND64(iAverageMotionIndex * AQ_INT_MULTIPLY,iMbTotalNum); + iAverageTextureIndex = WELS_DIV_ROUND64(iAverageTextureIndex * AQ_INT_MULTIPLY, iMbTotalNum); + if ((iAverageMotionIndex <= AQ_PESN) && (iAverageMotionIndex >= -AQ_PESN)) { + iAverageMotionIndex = AQ_INT_MULTIPLY; } - if ((dAverageTextureIndex <= PESN) && (dAverageTextureIndex >= -PESN)) { - dAverageTextureIndex = 1.0; + if ((iAverageTextureIndex <= AQ_PESN) && (iAverageTextureIndex >= -AQ_PESN)) { + iAverageTextureIndex = AQ_INT_MULTIPLY; } // motion mb residual map to QP // texture mb original map to QP iAverMotionTextureIndexToDeltaQp = 0; - dAverageMotionIndex = AVERAGE_TIME_MOTION * dAverageMotionIndex; + iAverageMotionIndex = WELS_DIV_ROUND64(AVERAGE_TIME_MOTION * iAverageMotionIndex, AQ_TIME_INT_MULTIPLY); if (m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_QUALITY_MODE) { - dAverageTextureIndex = AVERAGE_TIME_TEXTURE_QUALITYMODE * dAverageTextureIndex; + iAverageTextureIndex = WELS_DIV_ROUND64(AVERAGE_TIME_TEXTURE_QUALITYMODE * iAverageTextureIndex, AQ_TIME_INT_MULTIPLY); } else { - dAverageTextureIndex = AVERAGE_TIME_TEXTURE_BITRATEMODE * dAverageTextureIndex; + iAverageTextureIndex = WELS_DIV_ROUND64(AVERAGE_TIME_TEXTURE_BITRATEMODE * iAverageTextureIndex, AQ_TIME_INT_MULTIPLY); } + int64_t iAQ_EPSN = -((int64_t)AQ_PESN*AQ_TIME_INT_MULTIPLY*AQ_QSTEP_INT_MULTIPLY/AQ_INT_MULTIPLY); pMotionTexture = m_sAdaptiveQuantParam.pMotionTextureUnit; for (j = 0; j < iMbHeight; j ++) { for (i = 0; i < iMbWidth; i++) { - double a = pMotionTexture->uiTextureIndex / dAverageTextureIndex; - dQStep = (a - 1) / (a + MODEL_ALPHA); - dLumaTextureDeltaQp = MODEL_TIME * dQStep;// range +- 6 + int64_t a = WELS_DIV_ROUND64((int64_t)(pMotionTexture->uiTextureIndex) *AQ_INT_MULTIPLY * AQ_TIME_INT_MULTIPLY, iAverageTextureIndex); + iQStep = WELS_DIV_ROUND64((a - AQ_TIME_INT_MULTIPLY) * AQ_QSTEP_INT_MULTIPLY, (a + MODEL_ALPHA)); + iLumaTextureDeltaQp = MODEL_TIME * iQStep;// range +- 6 - iMotionTextureIndexToDeltaQp = (int8_t)dLumaTextureDeltaQp; + iMotionTextureIndexToDeltaQp = ((int32_t)(iLumaTextureDeltaQp/(AQ_TIME_INT_MULTIPLY))); - a = pMotionTexture->uiMotionIndex / dAverageMotionIndex; - dQStep = (a - 1) / (a + MODEL_ALPHA); - dLumaMotionDeltaQp = MODEL_TIME * dQStep;// range +- 6 + a = WELS_DIV_ROUND64(((int64_t)pMotionTexture->uiMotionIndex)*AQ_INT_MULTIPLY * AQ_TIME_INT_MULTIPLY, iAverageMotionIndex); + iQStep = WELS_DIV_ROUND64((a - AQ_TIME_INT_MULTIPLY) * AQ_QSTEP_INT_MULTIPLY, (a + MODEL_ALPHA)); + iLumaMotionDeltaQp = MODEL_TIME * iQStep;// range +- 6 - if ((m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_QUALITY_MODE && dLumaMotionDeltaQp < -PESN) + if ((m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_QUALITY_MODE && iLumaMotionDeltaQp < iAQ_EPSN) || (m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_BITRATE_MODE)) { - iMotionTextureIndexToDeltaQp += (int8_t)dLumaMotionDeltaQp; + iMotionTextureIndexToDeltaQp += ((int32_t)(iLumaMotionDeltaQp/(AQ_TIME_INT_MULTIPLY))); } - m_sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[j * iMbWidth + i] = iMotionTextureIndexToDeltaQp; + m_sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[j * iMbWidth + i] = (int8_t)(iMotionTextureIndexToDeltaQp/AQ_QSTEP_INT_MULTIPLY); iAverMotionTextureIndexToDeltaQp += iMotionTextureIndexToDeltaQp; pMotionTexture++; } } - m_sAdaptiveQuantParam.dAverMotionTextureIndexToDeltaQp = (1.0 * iAverMotionTextureIndexToDeltaQp) / iMbTotalNum; + + m_sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp = iAverMotionTextureIndexToDeltaQp / iMbTotalNum; eReturn = RET_SUCCESS; @@ -210,7 +212,7 @@ EResult CAdaptiveQuantization::Get (int32_t iType, void* pParam) { SAdaptiveQuantizationParam* sAdaptiveQuantParam = (SAdaptiveQuantizationParam*)pParam; - sAdaptiveQuantParam->dAverMotionTextureIndexToDeltaQp = m_sAdaptiveQuantParam.dAverMotionTextureIndexToDeltaQp; + sAdaptiveQuantParam->iAverMotionTextureIndexToDeltaQp = m_sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp; return RET_SUCCESS; } diff --git a/codec/processing/src/common/util.h b/codec/processing/src/common/util.h index 3bd2c746..fa12e5f6 100644 --- a/codec/processing/src/common/util.h +++ b/codec/processing/src/common/util.h @@ -58,6 +58,10 @@ WELSVP_NAMESPACE_BEGIN #define MAX_HEIGHT (2304)//MAX_FS_LEVEL51 (36864); MAX_FS_LEVEL51*256/4096 = 2304 #define MB_WIDTH_LUMA (16) #define PESN (1e-6) // desired float precision +#define AQ_INT_MULTIPLY 10000000 +#define AQ_TIME_INT_MULTIPLY 10000 +#define AQ_QSTEP_INT_MULTIPLY 100 +#define AQ_PESN 10 // (1e-6)*AQ_INT_MULTIPLY #define MB_TYPE_INTRA4x4 0x00000001 #define MB_TYPE_INTRA16x16 0x00000002 diff --git a/test/api/decode_encode_test.cpp b/test/api/decode_encode_test.cpp index f40de9e4..f574b171 100644 --- a/test/api/decode_encode_test.cpp +++ b/test/api/decode_encode_test.cpp @@ -104,8 +104,8 @@ TEST_P(DecodeEncodeTest, CompareOutput) { } static const DecodeEncodeFileParam kFileParamArray[] = { - {"res/test_vd_1d.264", "c73c1cc9b7dbab51f48cf41453073bb11337a445", 320, 192, 12.0f}, - {"res/test_vd_rc.264", "593ab31fdc67cbad7373abbf7d08daf2771fb229", 320, 192, 12.0f}, + {"res/test_vd_1d.264", "16b6379ce3711c57ea966e8da3e6b9815bf5ef4e", 320, 192, 12.0f}, + {"res/test_vd_rc.264", "03a93dbf5482b4a799d52d5fd373fde58a782e12", 320, 192, 12.0f}, }; INSTANTIATE_TEST_CASE_P(DecodeEncodeFile, DecodeEncodeTest, diff --git a/test/api/encoder_test.cpp b/test/api/encoder_test.cpp index 8d0cfdd2..1e6cdf34 100644 --- a/test/api/encoder_test.cpp +++ b/test/api/encoder_test.cpp @@ -73,31 +73,31 @@ TEST_P(EncoderOutputTest, CompareOutput) { static const EncodeFileParam kFileParamArray[] = { { "res/CiscoVT2people_320x192_12fps.yuv", - "5fa8c8551133b7d7586f498121028d0e05a28e1d", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1 + "86ee1314eabf5a6d6083c5260d36e2d908f36ea5", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1 }, { "res/CiscoVT2people_160x96_6fps.yuv", - "c619645a7d46f8fade40d2b0e5ae01adc2e5c3ff", CAMERA_VIDEO_REAL_TIME, 160, 96, 6.0f, SM_SINGLE_SLICE, false, 1 + "20f3e5cbefe70e5a8793e60672fd70e136bc082a", CAMERA_VIDEO_REAL_TIME, 160, 96, 6.0f, SM_SINGLE_SLICE, false, 1 }, { "res/Static_152_100.yuv", - "68cde1b5f790213baab1a10d4a19a3618c138405", CAMERA_VIDEO_REAL_TIME, 152, 100, 6.0f, SM_SINGLE_SLICE, false, 1 + "035f2e5b35947ab43cad7c5ca4cd8a3b658306ba", CAMERA_VIDEO_REAL_TIME, 152, 100, 6.0f, SM_SINGLE_SLICE, false, 1 }, { "res/CiscoVT2people_320x192_12fps.yuv", - "d0d0a087451c2813e9b0fd61bc5b25a4e82519ac", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_ROWMB_SLICE, false, 1 // One slice per MB row + "c8b759bcec7ffa048f1d3ded594b8815bed0aead", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_ROWMB_SLICE, false, 1 // One slice per MB row }, { "res/CiscoVT2people_320x192_12fps.yuv", - "d3760e61e38af978d5b59232d8402448812d1540", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, true, 1 + "6f49cc4e746522d7d91599c9a883072e838ae918", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, true, 1 }, { "res/CiscoVT2people_320x192_12fps.yuv", - "a74ae382356098fb5cce216a97f2c0cef00a0a9d", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 2 + "4da4526a763497cfd777e1a211c67df5e9d48b29", CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 2 }, { "res/Cisco_Absolute_Power_1280x720_30fps.yuv", - "76b26c32dd3b400d3dccee0e8a52581f5c2588bb", CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_DYN_SLICE, false, 1 + "6df1ece77c0de63cdf8ab52ccef3a7d139022717", CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_DYN_SLICE, false, 1 }, { "res/CiscoVT2people_320x192_12fps.yuv",