Compare commits
19 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8fcef67c70 | ||
![]() |
241700cb96 | ||
![]() |
0f21b2b02e | ||
![]() |
9b6476e98a | ||
![]() |
350cafd69a | ||
![]() |
91331e1ba4 | ||
![]() |
1f770c488c | ||
![]() |
5a8f5e8cf1 | ||
![]() |
7d00e8bc42 | ||
![]() |
8980731be0 | ||
![]() |
a43841d0e9 | ||
![]() |
9d89a6976e | ||
![]() |
687f9eff1b | ||
![]() |
d97f4c5b68 | ||
![]() |
03add69386 | ||
![]() |
ffe11835fc | ||
![]() |
44d8560698 | ||
![]() |
3a6ed92a35 | ||
![]() |
842b4f0243 |
2
Makefile
2
Makefile
@ -34,7 +34,7 @@ CCASFLAGS=$(CFLAGS)
|
||||
STATIC_LDFLAGS=-lstdc++
|
||||
|
||||
VERSION=1.6
|
||||
SHAREDLIBVERSION=0
|
||||
SHAREDLIBVERSION=3
|
||||
|
||||
ifeq (,$(wildcard $(SRC_PATH)gmp-api))
|
||||
HAVE_GMP_API=No
|
||||
|
@ -24,7 +24,7 @@ Encoder Features
|
||||
|
||||
Decoder Features
|
||||
----------------
|
||||
- Constrained Baseline Profile up to Level 5.2 (4096x2304)
|
||||
- Constrained Baseline Profile up to Level 5.2 (Max frame size is 36864 macro-blocks)
|
||||
- Arbitrary resolution, not constrained to multiples of 16x16
|
||||
- Single thread for all slices
|
||||
- Long Term Reference (LTR) frames
|
||||
|
8
RELEASES
8
RELEASES
@ -128,10 +128,10 @@ v1.6.0
|
||||
------
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-android19.so.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-ios.a.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-linux32.so.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-linux64.so.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-osx32.dylib.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-osx64.dylib.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-linux32.3.so.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-linux64.3.so.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-osx32.3.dylib.bz2
|
||||
http://ciscobinary.openh264.org/libopenh264-1.6.0-osx64.3.dylib.bz2
|
||||
http://ciscobinary.openh264.org/openh264-1.6.0-win32msvc.dll.bz2
|
||||
http://ciscobinary.openh264.org/openh264-1.6.0-win64msvc.dll.bz2
|
||||
|
||||
|
@ -1,6 +1,18 @@
|
||||
#for x86
|
||||
HAVE_AVX2 := true
|
||||
|
||||
ifneq ($(filter %86 x86_64, $(ARCH)),)
|
||||
include $(SRC_PATH)build/x86-common.mk
|
||||
ifeq ($(USE_ASM), Yes)
|
||||
ifeq ($(HAVE_AVX2), true)
|
||||
CFLAGS += -DHAVE_AVX2
|
||||
CXXFLAGS += -DHAVE_AVX2
|
||||
ASMFLAGS += -DHAVE_AVX2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#for arm
|
||||
ifneq ($(filter-out arm64, $(filter arm%, $(ARCH))),)
|
||||
ifeq ($(USE_ASM), Yes)
|
||||
ASM_ARCH = arm
|
||||
@ -8,6 +20,8 @@ ASMFLAGS += -I$(SRC_PATH)codec/common/arm/
|
||||
CFLAGS += -DHAVE_NEON
|
||||
endif
|
||||
endif
|
||||
|
||||
#for arm64
|
||||
ifneq ($(filter arm64 aarch64, $(ARCH)),)
|
||||
ifeq ($(USE_ASM), Yes)
|
||||
ASM_ARCH = arm64
|
||||
|
@ -56,7 +56,6 @@
|
||||
#define WELS_CPU_SSE42 0x00000400 /* sse 4.2 */
|
||||
|
||||
/* CPU features application extensive */
|
||||
#define WELS_CPU_AVX 0x00000800 /* Advanced Vector eXtentions */
|
||||
#define WELS_CPU_FPU 0x00001000 /* x87-FPU on chip */
|
||||
#define WELS_CPU_HTT 0x00002000 /* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
|
||||
physical processor package is capable of supporting more than one logic processor
|
||||
@ -67,7 +66,13 @@
|
||||
#define WELS_CPU_MOVBE 0x00008000 /* MOVBE instruction */
|
||||
#define WELS_CPU_AES 0x00010000 /* AES instruction extensions */
|
||||
#define WELS_CPU_FMA 0x00020000 /* AVX VEX FMA instruction sets */
|
||||
#define WELS_CPU_AVX 0x00000800 /* Advanced Vector eXtentions */
|
||||
|
||||
#ifdef HAVE_AVX2
|
||||
#define WELS_CPU_AVX2 0x00040000 /* AVX2 */
|
||||
#else
|
||||
#define WELS_CPU_AVX2 0x00000000 /* !AVX2 */
|
||||
#endif
|
||||
|
||||
#define WELS_CPU_CACHELINE_16 0x10000000 /* CacheLine Size 16 */
|
||||
#define WELS_CPU_CACHELINE_32 0x20000000 /* CacheLine Size 32 */
|
||||
|
@ -678,6 +678,7 @@ WELS_EXTERN WelsIDctRecI16x16Dc_sse2
|
||||
; AVX2 functions
|
||||
;***********************************************************************
|
||||
|
||||
%ifdef HAVE_AVX2
|
||||
; out=%1 pPixel1=%2 iStride1=%3 pPixel2=%4 iStride2=%5 wels_shufb0312_movzxw=%6 clobber=%7,%8
|
||||
%macro AVX2_LoadDiff16P 8
|
||||
vmovq x%1, [%2 ]
|
||||
@ -1011,3 +1012,5 @@ WELS_EXTERN WelsIDctT4Rec_avx2
|
||||
POP_XMM
|
||||
LOAD_5_PARA_POP
|
||||
ret
|
||||
%endif
|
||||
|
||||
|
@ -1504,6 +1504,7 @@ loop_get_satd_16x16_right:
|
||||
;
|
||||
;***********************************************************************
|
||||
|
||||
%ifdef HAVE_AVX2
|
||||
; out=%1 pSrcA=%2 pSrcB=%3 HSumSubDB1_256=%4 ymm_clobber=%5
|
||||
%macro AVX2_LoadDiffSatd16x1 5
|
||||
vbroadcasti128 %1, [%2]
|
||||
@ -1723,6 +1724,8 @@ WelsSampleSatd16x4N_avx2:
|
||||
%endif
|
||||
ret
|
||||
|
||||
%endif
|
||||
|
||||
;***********************************************************************
|
||||
;
|
||||
;Pixel_satd_wxh_avx2 END
|
||||
|
@ -48,8 +48,10 @@ extern "C" {
|
||||
#if defined(X86_ASM)
|
||||
void IdctResAddPred_mmx (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
||||
void IdctResAddPred_sse2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
||||
#if defined(HAVE_AVX2)
|
||||
void IdctResAddPred_avx2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
|
||||
void IdctFourResAddPred_avx2 (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc);
|
||||
#endif
|
||||
#endif//X86_ASM
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
|
@ -1377,12 +1377,7 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux,
|
||||
pPps->bWeightedPredFlag = !!uiCode;
|
||||
WELS_READ_VERIFY (BsGetBits (pBsAux, 2, &uiCode)); //weighted_bipred_idc
|
||||
pPps->uiWeightedBipredIdc = uiCode;
|
||||
if (pPps->uiWeightedBipredIdc != 0) {
|
||||
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
|
||||
"ParsePps(): weighted_bipred_idc (%d) not supported.\n",
|
||||
pPps->uiWeightedBipredIdc);
|
||||
return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_WP);
|
||||
}
|
||||
// weighted_bipred_idc > 0 NOT supported now, but no impact when we ignore it
|
||||
|
||||
WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //pic_init_qp_minus26
|
||||
pPps->iPicInitQp = PIC_INIT_QP_OFFSET + iCode;
|
||||
|
@ -535,6 +535,10 @@ int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail
|
||||
pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
|
||||
pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
|
||||
pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
|
||||
|
||||
if (pCtx->pSps->uiChromaFormatIdc == 0)
|
||||
return ERR_NONE;
|
||||
|
||||
if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
|
||||
WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
|
||||
if (iCode > MAX_PRED_MODE_ID_CHROMA) {
|
||||
|
@ -1005,11 +1005,14 @@ void InitPredFunc (PWelsDecoderContext pCtx, uint32_t uiCpuFlag) {
|
||||
pCtx->pGetIChromaPredFunc[C_PRED_DC_T] = WelsDecoderIChromaPredDcTop_sse2;
|
||||
pCtx->pGetI4x4LumaPredFunc[I4_PRED_H] = WelsDecoderI4x4LumaPredH_sse2;
|
||||
}
|
||||
#if defined(HAVE_AVX2)
|
||||
if (uiCpuFlag & WELS_CPU_AVX2) {
|
||||
pCtx->pIdctResAddPredFunc = IdctResAddPred_avx2;
|
||||
pCtx->pIdctFourResAddPredFunc = IdctFourResAddPred_avx2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
//reset decoder number related statistics info
|
||||
|
@ -48,7 +48,7 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
|
||||
|
||||
int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
|
||||
int32_t iLeftTopType, iRightTopType, iTopType, iLeftType;
|
||||
int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy, iLeftTopXy, iRightTopXy = 0;
|
||||
int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
|
||||
|
||||
int8_t iLeftRef;
|
||||
int8_t iTopRef;
|
||||
|
@ -216,7 +216,7 @@ typedef struct TagWelsSvcCodingParam: SEncParamExt {
|
||||
bDeblockingParallelFlag = false;// deblocking filter parallelization control flag
|
||||
|
||||
iDecompStages = 0; // GOP size dependency, unknown here and be revised later
|
||||
iBitsVaryPercentage = 0;
|
||||
iBitsVaryPercentage = 10;
|
||||
}
|
||||
|
||||
int32_t ParamBaseTranscode (const SEncParamBase& pCodingParam) {
|
||||
|
@ -119,7 +119,7 @@ enum {
|
||||
#define FRAME_iTargetBits_VARY_RANGE 50 // *INT_MULTIPLY
|
||||
//R-Q Model
|
||||
#define LINEAR_MODEL_DECAY_FACTOR 80 // *INT_MULTIPLY
|
||||
#define FRAME_CMPLX_RATIO_RANGE 10 // *INT_MULTIPLY
|
||||
#define FRAME_CMPLX_RATIO_RANGE 20 // *INT_MULTIPLY
|
||||
#define SMOOTH_FACTOR_MIN_VALUE 2 // *INT_MULTIPLY
|
||||
//#define VGOP_BITS_MIN_RATIO 0.8
|
||||
//skip and padding
|
||||
@ -174,6 +174,7 @@ int32_t iCurrentBitsLevel;//0:normal; 1:limited; 2:exceeded.
|
||||
int32_t iIdrNum;
|
||||
int64_t iIntraComplexity; //255*255(MaxMbSAD)*36864(MaxFS) make the highest bit of 32-bit integer 1
|
||||
int32_t iIntraMbCount;
|
||||
int64_t iIntraComplxMean;
|
||||
|
||||
int8_t iTlOfFrames[VGOP_SIZE];
|
||||
int32_t iRemainingWeights;
|
||||
@ -184,6 +185,7 @@ int32_t* pGomForegroundBlockNum;
|
||||
int32_t* pCurrentFrameGomSad;
|
||||
int32_t* pGomCost;
|
||||
|
||||
int32_t bEnableGomQp;
|
||||
int32_t iAverageFrameQp;
|
||||
int32_t iMinFrameQp;
|
||||
int32_t iMaxFrameQp;
|
||||
|
@ -3704,7 +3704,6 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi, const SSour
|
||||
#ifdef LONG_TERM_REF_DUMP
|
||||
DumpRef (pCtx);
|
||||
#endif
|
||||
|
||||
if (pSvcParam->iRCMode != RC_OFF_MODE)
|
||||
pCtx->pVpp->AnalyzePictureComplexity (pCtx, pCtx->pEncPic, ((pCtx->eSliceType == P_SLICE)
|
||||
&& (pCtx->iNumRef0 > 0)) ? pCtx->pRefList0[0] : NULL,
|
||||
@ -3712,7 +3711,6 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi, const SSour
|
||||
WelsUpdateRefSyntax (pCtx, pParamInternal->iPOC,
|
||||
eFrameType); //get reordering syntax used for writing slice header and transmit to encoder.
|
||||
PrefetchReferencePicture (pCtx, eFrameType); // update reference picture for current pDq layer
|
||||
|
||||
pCtx->pFuncList->pfRc.pfWelsRcPictureInit (pCtx, pFbi->uiTimeStamp);
|
||||
PreprocessSliceCoding (pCtx); // MUST be called after pfWelsRcPictureInit() and WelsInitCurrentLayer()
|
||||
|
||||
|
@ -163,7 +163,7 @@ void RcInitSequenceParameter (sWelsEncCtx* pEncCtx) {
|
||||
|
||||
pWelsSvcRc->iSkipFrameNum = 0;
|
||||
pWelsSvcRc->iGomSize = (pWelsSvcRc->iNumberMbFrame + pWelsSvcRc->iNumberMbGom - 1) / pWelsSvcRc->iNumberMbGom;
|
||||
|
||||
pWelsSvcRc->bEnableGomQp = true;
|
||||
|
||||
RcInitLayerMemory (pWelsSvcRc, pEncCtx->pMemAlign, 1 + pEncCtx->pSvcParam->sDependencyLayers[j].iHighestTemporalId);
|
||||
|
||||
@ -281,7 +281,7 @@ void RcInitRefreshParameter (sWelsEncCtx* pEncCtx) {
|
||||
//I frame R-Q Model
|
||||
pWelsSvcRc->iIntraComplexity = 0;
|
||||
pWelsSvcRc->iIntraMbCount = 0;
|
||||
|
||||
pWelsSvcRc->iIntraComplxMean = 0;
|
||||
//P frame R-Q Model
|
||||
for (i = 0; i <= kiHighestTid; i++) {
|
||||
pTOverRc[i].iPFrameNum = 0;
|
||||
@ -399,7 +399,6 @@ void RcInitIdrQp (sWelsEncCtx* pEncCtx) {
|
||||
pDLayerParam->iVideoHeight);
|
||||
else
|
||||
dBpp = 0.1;
|
||||
|
||||
//Area*2
|
||||
if (pDLayerParam->iVideoWidth * pDLayerParam->iVideoHeight <= 28800) // 90p video:160*90
|
||||
iBppIndex = 0;
|
||||
@ -420,6 +419,10 @@ void RcInitIdrQp (sWelsEncCtx* pEncCtx) {
|
||||
pEncCtx->iGlobalQp = pWelsSvcRc->iInitialQp;
|
||||
pWelsSvcRc->iQStep = RcConvertQp2QStep (pEncCtx->iGlobalQp);
|
||||
pWelsSvcRc->iLastCalculatedQScale = pEncCtx->iGlobalQp;
|
||||
pWelsSvcRc->iMinFrameQp = WELS_CLIP3 (pEncCtx->iGlobalQp - DELTA_QP_BGD_THD, pEncCtx->pSvcParam->iMinQp,
|
||||
pEncCtx->pSvcParam->iMaxQp);
|
||||
pWelsSvcRc->iMaxFrameQp = WELS_CLIP3 (pEncCtx->iGlobalQp + DELTA_QP_BGD_THD, pEncCtx->pSvcParam->iMinQp,
|
||||
pEncCtx->pSvcParam->iMaxQp);
|
||||
}
|
||||
|
||||
void RcCalculateIdrQp (sWelsEncCtx* pEncCtx) {
|
||||
@ -429,12 +432,21 @@ void RcCalculateIdrQp (sWelsEncCtx* pEncCtx) {
|
||||
pWelsSvcRc->iIntraComplexity = pWelsSvcRc->iIntraComplexity * pWelsSvcRc->iNumberMbFrame /
|
||||
pWelsSvcRc->iIntraMbCount;
|
||||
}
|
||||
pWelsSvcRc->iInitialQp = RcConvertQStep2Qp (WELS_DIV_ROUND (pWelsSvcRc->iIntraComplexity,
|
||||
pWelsSvcRc->iTargetBits));
|
||||
|
||||
int64_t iCmplxRatio = WELS_DIV_ROUND64 (pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity * INT_MULTIPLY,
|
||||
pWelsSvcRc->iIntraComplxMean);
|
||||
iCmplxRatio = WELS_CLIP3 (iCmplxRatio, INT_MULTIPLY - FRAME_CMPLX_RATIO_RANGE, INT_MULTIPLY + FRAME_CMPLX_RATIO_RANGE);
|
||||
pWelsSvcRc->iQStep = WELS_DIV_ROUND ((pWelsSvcRc->iIntraComplexity * iCmplxRatio),
|
||||
(pWelsSvcRc->iTargetBits * INT_MULTIPLY));
|
||||
pWelsSvcRc->iInitialQp = RcConvertQStep2Qp (pWelsSvcRc->iQStep);
|
||||
pWelsSvcRc->iInitialQp = WELS_CLIP3 (pWelsSvcRc->iInitialQp, pEncCtx->pSvcParam->iMinQp, pEncCtx->pSvcParam->iMaxQp);
|
||||
pEncCtx->iGlobalQp = pWelsSvcRc->iInitialQp;
|
||||
pWelsSvcRc->iQStep = RcConvertQp2QStep (pEncCtx->iGlobalQp);
|
||||
pWelsSvcRc->iLastCalculatedQScale = pEncCtx->iGlobalQp;
|
||||
pWelsSvcRc->iMinFrameQp = WELS_CLIP3 (pEncCtx->iGlobalQp - DELTA_QP_BGD_THD, pEncCtx->pSvcParam->iMinQp,
|
||||
pEncCtx->pSvcParam->iMaxQp);
|
||||
pWelsSvcRc->iMaxFrameQp = WELS_CLIP3 (pEncCtx->iGlobalQp + DELTA_QP_BGD_THD, pEncCtx->pSvcParam->iMinQp,
|
||||
pEncCtx->pSvcParam->iMaxQp);
|
||||
}
|
||||
|
||||
|
||||
@ -443,36 +455,22 @@ void RcCalculatePictureQp (sWelsEncCtx* pEncCtx) {
|
||||
int32_t iTl = pEncCtx->uiTemporalId;
|
||||
SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[iTl];
|
||||
int32_t iLumaQp = 0;
|
||||
|
||||
int32_t iDeltaQpTemporal = 0;
|
||||
if (0 == pTOverRc->iPFrameNum) {
|
||||
iLumaQp = pWelsSvcRc->iInitialQp;
|
||||
} else if (pWelsSvcRc->iCurrentBitsLevel == BITS_EXCEEDED) {
|
||||
iLumaQp = MAX_LOW_BR_QP;
|
||||
iLumaQp = pWelsSvcRc->iLastCalculatedQScale + DELTA_QP_BGD_THD;
|
||||
//limit QP
|
||||
int32_t iLastIdxCodecInVGop = pWelsSvcRc->iFrameCodedInVGop - 1;
|
||||
if (iLastIdxCodecInVGop < 0)
|
||||
iLastIdxCodecInVGop += VGOP_SIZE;
|
||||
int32_t iTlLast = pWelsSvcRc->iTlOfFrames[iLastIdxCodecInVGop];
|
||||
int32_t iDeltaQpTemporal = iTl - iTlLast;
|
||||
iDeltaQpTemporal = iTl - iTlLast;
|
||||
if (0 == iTlLast && iTl > 0)
|
||||
iDeltaQpTemporal += 3;
|
||||
iDeltaQpTemporal += 1;
|
||||
else if (0 == iTl && iTlLast > 0)
|
||||
iDeltaQpTemporal -= 3;
|
||||
iDeltaQpTemporal -= 1;
|
||||
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp,
|
||||
pWelsSvcRc->iLastCalculatedQScale - pWelsSvcRc->iFrameDeltaQpLower + iDeltaQpTemporal,
|
||||
pWelsSvcRc->iLastCalculatedQScale + pWelsSvcRc->iFrameDeltaQpUpper + iDeltaQpTemporal);
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
|
||||
if (pEncCtx->pSvcParam->bEnableAdaptiveQuant) {
|
||||
iLumaQp = WELS_CLIP3 ((iLumaQp * INT_MULTIPLY - pEncCtx->pVaa->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp) /
|
||||
INT_MULTIPLY, pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
}
|
||||
pWelsSvcRc->iQStep = RcConvertQp2QStep (iLumaQp);
|
||||
pWelsSvcRc->iLastCalculatedQScale = iLumaQp;
|
||||
pEncCtx->iGlobalQp = iLumaQp;
|
||||
|
||||
return;
|
||||
} else {
|
||||
int64_t iCmplxRatio = WELS_DIV_ROUND64 (pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity * INT_MULTIPLY,
|
||||
pTOverRc->iFrameCmplxMean);
|
||||
@ -480,7 +478,9 @@ void RcCalculatePictureQp (sWelsEncCtx* pEncCtx) {
|
||||
|
||||
pWelsSvcRc->iQStep = WELS_DIV_ROUND ((pTOverRc->iLinearCmplx * iCmplxRatio), (pWelsSvcRc->iTargetBits * INT_MULTIPLY));
|
||||
iLumaQp = RcConvertQStep2Qp (pWelsSvcRc->iQStep);
|
||||
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"iCmplxRatio = %d,frameComplexity = %lld,iFrameCmplxMean = %d,iQStep = %d,iLumaQp = %d", (int)iCmplxRatio,
|
||||
pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity, pTOverRc->iFrameCmplxMean, pWelsSvcRc->iQStep, iLumaQp);
|
||||
//limit QP
|
||||
int32_t iLastIdxCodecInVGop = pWelsSvcRc->iFrameCodedInVGop - 1;
|
||||
if (iLastIdxCodecInVGop < 0)
|
||||
@ -488,21 +488,22 @@ void RcCalculatePictureQp (sWelsEncCtx* pEncCtx) {
|
||||
int32_t iTlLast = pWelsSvcRc->iTlOfFrames[iLastIdxCodecInVGop];
|
||||
int32_t iDeltaQpTemporal = iTl - iTlLast;
|
||||
if (0 == iTlLast && iTl > 0)
|
||||
iDeltaQpTemporal += 3;
|
||||
iDeltaQpTemporal += 1;
|
||||
else if (0 == iTl && iTlLast > 0)
|
||||
iDeltaQpTemporal -= 3;
|
||||
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp,
|
||||
pWelsSvcRc->iLastCalculatedQScale - pWelsSvcRc->iFrameDeltaQpLower + iDeltaQpTemporal,
|
||||
pWelsSvcRc->iLastCalculatedQScale + pWelsSvcRc->iFrameDeltaQpUpper + iDeltaQpTemporal);
|
||||
iDeltaQpTemporal -= 1;
|
||||
}
|
||||
pWelsSvcRc->iMinFrameQp = WELS_CLIP3 (pWelsSvcRc->iLastCalculatedQScale - pWelsSvcRc->iFrameDeltaQpLower +
|
||||
iDeltaQpTemporal, pTOverRc->iMinQp, pTOverRc->iMaxQp) ;
|
||||
pWelsSvcRc->iMaxFrameQp = WELS_CLIP3 (pWelsSvcRc->iLastCalculatedQScale + pWelsSvcRc->iFrameDeltaQpUpper +
|
||||
iDeltaQpTemporal, pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pWelsSvcRc->iMinFrameQp, pWelsSvcRc->iMaxFrameQp);
|
||||
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
if (pEncCtx->pSvcParam->bEnableAdaptiveQuant) {
|
||||
|
||||
iLumaQp = WELS_DIV_ROUND (iLumaQp * INT_MULTIPLY - pEncCtx->pVaa->sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp,
|
||||
INT_MULTIPLY);
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pWelsSvcRc->iMinFrameQp, pWelsSvcRc->iMaxFrameQp);
|
||||
}
|
||||
pWelsSvcRc->iQStep = RcConvertQp2QStep (iLumaQp);
|
||||
pWelsSvcRc->iLastCalculatedQScale = iLumaQp;
|
||||
@ -554,6 +555,57 @@ void RcDecideTargetBits (sWelsEncCtx* pEncCtx) {
|
||||
pWelsSvcRc->iRemainingWeights -= pTOverRc->iTlayerWeight;
|
||||
}
|
||||
|
||||
void RcDecideTargetBitsTimestamp (sWelsEncCtx* pEncCtx) {
|
||||
//decide one frame bits allocated
|
||||
SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId];
|
||||
SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId];
|
||||
int32_t iTl = pEncCtx->uiTemporalId;
|
||||
SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[iTl];
|
||||
|
||||
pWelsSvcRc->iCurrentBitsLevel = BITS_NORMAL;
|
||||
if (pEncCtx->eSliceType == I_SLICE) {
|
||||
int32_t iBufferTh = static_cast<int32_t> (pWelsSvcRc->iBufferSizeSkip - pWelsSvcRc->iBufferFullnessSkip);
|
||||
if (iBufferTh <= 0) {
|
||||
pWelsSvcRc->iCurrentBitsLevel = BITS_EXCEEDED;
|
||||
pWelsSvcRc->iTargetBits = pTOverRc->iMinBitsTl;
|
||||
} else {
|
||||
int32_t iMaxTh = iBufferTh * 3 / 4;
|
||||
int32_t iMinTh = static_cast<int32_t> (iBufferTh * 2 / pDLayerParam->fFrameRate);
|
||||
pWelsSvcRc->iTargetBits = static_cast<int32_t> (((double) (pDLayerParam->iSpatialBitrate) / (double) (
|
||||
pDLayerParam->fFrameRate) * IDR_BITRATE_RATIO));
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"iMaxTh = %d,iMinTh = %d,pWelsSvcRc->iTargetBits = %d,pWelsSvcRc->iBufferSizeSkip = %d, pWelsSvcRc->iBufferFullnessSkip= %"
|
||||
PRId64 ,
|
||||
iMaxTh, iMinTh, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferSizeSkip, pWelsSvcRc->iBufferFullnessSkip);
|
||||
pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, iMinTh, iMaxTh);
|
||||
}
|
||||
|
||||
} else {
|
||||
int32_t iBufferTh = static_cast<int32_t> (pWelsSvcRc->iBufferSizeSkip - pWelsSvcRc->iBufferFullnessSkip);
|
||||
if (iBufferTh <= 0) {
|
||||
pWelsSvcRc->iCurrentBitsLevel = BITS_EXCEEDED;
|
||||
pWelsSvcRc->iTargetBits = pTOverRc->iMinBitsTl;
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"iMaxTh = %d,pWelsSvcRc->iTargetBits = %d,pWelsSvcRc->iBufferSizeSkip = %d, pWelsSvcRc->iBufferFullnessSkip= %" PRId64,
|
||||
iBufferTh, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferSizeSkip, pWelsSvcRc->iBufferFullnessSkip);
|
||||
} else {
|
||||
|
||||
SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId];
|
||||
const int32_t kiGopSize = (1 << pDLayerParamInternal->iDecompositionStages);
|
||||
int32_t iAverageFrameSize = (int32_t) ((double) (pDLayerParam->iSpatialBitrate) / (double) (pDLayerParam->fFrameRate));
|
||||
const int32_t kiGopBits = iAverageFrameSize * kiGopSize;
|
||||
pWelsSvcRc->iTargetBits = WELS_DIV_ROUND (pTOverRc->iTlayerWeight * kiGopBits, INT_MULTIPLY * 10 * 2);
|
||||
|
||||
int32_t iMaxTh = iBufferTh / 2;
|
||||
int32_t iMinTh = (int32_t) (iBufferTh * 2 / pDLayerParam->fFrameRate);
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"iMaxTh = %d,iMinTh = %d,pWelsSvcRc->iTargetBits = %d,pWelsSvcRc->iBufferSizeSkip = %d, pWelsSvcRc->iBufferFullnessSkip= % "
|
||||
PRId64,
|
||||
iMaxTh, iMinTh, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferSizeSkip, pWelsSvcRc->iBufferFullnessSkip);
|
||||
pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, iMinTh, iMaxTh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RcInitGomParameters (sWelsEncCtx* pEncCtx) {
|
||||
SSlice* pSliceInLayer = pEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer;
|
||||
@ -563,8 +615,6 @@ void RcInitGomParameters (sWelsEncCtx* pEncCtx) {
|
||||
const int32_t kiGlobalQp = pEncCtx->iGlobalQp;
|
||||
|
||||
pWelsSvcRc->iAverageFrameQp = 0;
|
||||
pWelsSvcRc->iMinFrameQp = 51;
|
||||
pWelsSvcRc->iMaxFrameQp = 0;
|
||||
for (int32_t i = 0; i < kiSliceNum; ++i) {
|
||||
pSOverRc = &pSliceInLayer[i].sSlicingOverRc;
|
||||
pSOverRc->iComplexityIndexSlice = 0;
|
||||
@ -581,11 +631,11 @@ void RcCalculateMbQp (sWelsEncCtx* pEncCtx, SMB* pCurMb, const int32_t kiSliceId
|
||||
|
||||
int32_t iLumaQp = pSOverRc->iCalculatedQpSlice;
|
||||
SDqLayer* pCurLayer = pEncCtx->pCurDqLayer;
|
||||
SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[pEncCtx->uiTemporalId];
|
||||
const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset;
|
||||
if (pEncCtx->pSvcParam->bEnableAdaptiveQuant) {
|
||||
iLumaQp = (int8_t)WELS_CLIP3 (iLumaQp +
|
||||
pEncCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[pCurMb->iMbXY], pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
pEncCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[pCurMb->iMbXY], pWelsSvcRc->iMinFrameQp,
|
||||
pWelsSvcRc->iMaxFrameQp);
|
||||
}
|
||||
pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (iLumaQp + kuiChromaQpIndexOffset)];
|
||||
pCurMb->uiLumaQp = iLumaQp;
|
||||
@ -637,15 +687,15 @@ void RcGomTargetBits (sWelsEncCtx* pEncCtx, const int32_t kiSliceId) {
|
||||
} else {
|
||||
pWelsSvcRc_Base = RcJudgeBaseUsability (pEncCtx);
|
||||
pWelsSvcRc_Base = (pWelsSvcRc_Base) ? pWelsSvcRc_Base : pWelsSvcRc;
|
||||
for (i = kiComplexityIndex; i <= iLastGomIndex; i++) {
|
||||
for (i = kiComplexityIndex + 1; i <= iLastGomIndex; i++) {
|
||||
iSumSad += pWelsSvcRc_Base->pCurrentFrameGomSad[i];
|
||||
}
|
||||
|
||||
if (0 == iSumSad)
|
||||
iAllocateBits = WELS_DIV_ROUND (iLeftBits, (iLastGomIndex - kiComplexityIndex));
|
||||
else
|
||||
iAllocateBits = WELS_DIV_ROUND ((int64_t)iLeftBits * pWelsSvcRc_Base->pCurrentFrameGomSad[kiComplexityIndex + 1],
|
||||
iSumSad);
|
||||
|
||||
}
|
||||
pSOverRc->iGomTargetBits = iAllocateBits;
|
||||
}
|
||||
@ -660,7 +710,6 @@ void RcCalculateGomQp (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iSliceId) {
|
||||
|
||||
int64_t iLeftBits = pSOverRc->iTargetBitsSlice - pSOverRc->iFrameBitsSlice;
|
||||
int64_t iTargetLeftBits = iLeftBits + pSOverRc->iGomBitsSlice - pSOverRc->iGomTargetBits;
|
||||
|
||||
if ((iLeftBits <= 0) || (iTargetLeftBits <= 0)) {
|
||||
pSOverRc->iCalculatedQpSlice += 2;
|
||||
} else {
|
||||
@ -675,8 +724,9 @@ void RcCalculateGomQp (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iSliceId) {
|
||||
else if (iBitsRatio > 11900) //2^(1.5/6)*10000
|
||||
pSOverRc->iCalculatedQpSlice -= 2;
|
||||
}
|
||||
pSOverRc->iCalculatedQpSlice = WELS_CLIP3 (pSOverRc->iCalculatedQpSlice, pWelsSvcRc->iMinQp, pWelsSvcRc->iMaxQp);
|
||||
|
||||
pSOverRc->iCalculatedQpSlice = WELS_CLIP3 (pSOverRc->iCalculatedQpSlice, pWelsSvcRc->iMinFrameQp,
|
||||
pWelsSvcRc->iMaxFrameQp);
|
||||
// WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,"iCalculatedQpSlice =%d,iBitsRatio = %d\n",pSOverRc->iCalculatedQpSlice,iBitsRatio);
|
||||
pSOverRc->iGomBitsSlice = 0;
|
||||
|
||||
}
|
||||
@ -707,6 +757,9 @@ void RcVBufferCalculationSkip (sWelsEncCtx* pEncCtx) {
|
||||
|| (dIncPercent > pWelsSvcRc->iRcVaryPercentage)) {
|
||||
pWelsSvcRc->bSkipFlag = true;
|
||||
}
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"[Rc] VBV_Skip,dIncPercent = %f,iRcVaryPercentage = %d,pWelsSvcRc->bSkipFlag = %d", dIncPercent,
|
||||
pWelsSvcRc->iRcVaryPercentage, pWelsSvcRc->bSkipFlag);
|
||||
}
|
||||
void CheckFrameSkipBasedMaxbr (sWelsEncCtx* pEncCtx, const long long uiTimeStamp, int32_t iDidIdx) {
|
||||
SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[iDidIdx];
|
||||
@ -769,7 +822,7 @@ void CheckFrameSkipBasedMaxbr (sWelsEncCtx* pEncCtx, const long long uiTimeStamp
|
||||
}
|
||||
}
|
||||
|
||||
bool WelsRcCheckFrameStatus (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_t iSpatialNum,int32_t iCurDid) {
|
||||
bool WelsRcCheckFrameStatus (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_t iSpatialNum, int32_t iCurDid) {
|
||||
|
||||
bool bSkipMustFlag = false;
|
||||
|
||||
@ -787,7 +840,7 @@ bool WelsRcCheckFrameStatus (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_
|
||||
}
|
||||
//check max_br skip
|
||||
if (pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr) {
|
||||
if ((!bSkipMustFlag)&&(pEncCtx->pSvcParam->sSpatialLayers[iDidIdx].iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE)) {
|
||||
if ((!bSkipMustFlag) && (pEncCtx->pSvcParam->sSpatialLayers[iDidIdx].iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE)) {
|
||||
pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr (pEncCtx, uiTimeStamp, iDidIdx);
|
||||
if (true == pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag) {
|
||||
bSkipMustFlag = true;
|
||||
@ -810,11 +863,11 @@ bool WelsRcCheckFrameStatus (sWelsEncCtx* pEncCtx, long long uiTimeStamp, int32_
|
||||
pEncCtx->pFuncList->pfRc.pfWelsRcPicDelayJudge (pEncCtx, uiTimeStamp, iDidIdx);
|
||||
}
|
||||
if (true == pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag) {
|
||||
bSkipMustFlag = true;
|
||||
bSkipMustFlag = true;
|
||||
}
|
||||
//check max_br skip
|
||||
if (pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr) {
|
||||
if ((!bSkipMustFlag)&&(pEncCtx->pSvcParam->sSpatialLayers[iDidIdx].iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE)) {
|
||||
if ((!bSkipMustFlag) && (pEncCtx->pSvcParam->sSpatialLayers[iDidIdx].iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE)) {
|
||||
pEncCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr (pEncCtx, uiTimeStamp, iDidIdx);
|
||||
if (true == pEncCtx->pWelsSvcRc[iDidIdx].bSkipFlag) {
|
||||
bSkipMustFlag = true;
|
||||
@ -847,7 +900,7 @@ void UpdateBufferWhenFrameSkipped (sWelsEncCtx* pEncCtx, int32_t iCurDid) {
|
||||
pWelsSvcRc->iBufferMaxBRFullness[ODD_TIME_WINDOW] -= kiOutputMaxBits;
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"[Rc] iDid = %d,bits in buffer = %" PRId64 ", bits in Max bitrate buffer = %" PRId64,
|
||||
iCurDid,pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW]);
|
||||
iCurDid, pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iBufferMaxBRFullness[EVEN_TIME_WINDOW]);
|
||||
|
||||
pWelsSvcRc->iBufferFullnessSkip = WELS_MAX (pWelsSvcRc->iBufferFullnessSkip, 0);
|
||||
|
||||
@ -859,7 +912,7 @@ void UpdateBufferWhenFrameSkipped (sWelsEncCtx* pEncCtx, int32_t iCurDid) {
|
||||
//output a warning when iContinualSkipFrames is large enough, which may indicate subjective quality problem
|
||||
//note that here iContinualSkipFrames must be >0, so the log output will be 3/6/....
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_WARNING, "[Rc] iDid = %d,iContinualSkipFrames(%d) is large",
|
||||
iCurDid,pWelsSvcRc->iContinualSkipFrames);
|
||||
iCurDid, pWelsSvcRc->iContinualSkipFrames);
|
||||
}
|
||||
}
|
||||
void UpdateMaxBrCheckWindowStatus (sWelsEncCtx* pEncCtx, int32_t iSpatialNum, const long long uiTimeStamp) {
|
||||
@ -992,10 +1045,22 @@ void RcUpdateIntraComplexity (sWelsEncCtx* pEncCtx) {
|
||||
if (iAlpha < (INT_MULTIPLY / 4)) iAlpha = INT_MULTIPLY / 4;
|
||||
int32_t iQStep = RcConvertQp2QStep (pWelsSvcRc->iAverageFrameQp);
|
||||
int64_t iIntraCmplx = iQStep * static_cast<int64_t> (pWelsSvcRc->iFrameDqBits);
|
||||
pWelsSvcRc->iIntraComplexity = WELS_DIV_ROUND (((INT_MULTIPLY - iAlpha) * pWelsSvcRc->iIntraComplexity + iAlpha *
|
||||
iIntraCmplx), INT_MULTIPLY);
|
||||
pWelsSvcRc->iIntraMbCount = pWelsSvcRc->iNumberMbFrame;
|
||||
if (pWelsSvcRc->iIdrNum == 0) {
|
||||
pWelsSvcRc->iIntraComplexity = iIntraCmplx;
|
||||
pWelsSvcRc->iIntraComplxMean = pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity;
|
||||
} else {
|
||||
pWelsSvcRc->iIntraComplexity = WELS_DIV_ROUND (((LINEAR_MODEL_DECAY_FACTOR) * pWelsSvcRc->iIntraComplexity +
|
||||
(INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) *
|
||||
iIntraCmplx), INT_MULTIPLY);
|
||||
|
||||
|
||||
pWelsSvcRc->iIntraComplxMean = WELS_DIV_ROUND64 (((LINEAR_MODEL_DECAY_FACTOR) * static_cast<int64_t>
|
||||
(pWelsSvcRc->iIntraComplxMean)
|
||||
+ (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * (pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity)),
|
||||
INT_MULTIPLY);
|
||||
}
|
||||
|
||||
pWelsSvcRc->iIntraMbCount = pWelsSvcRc->iNumberMbFrame;
|
||||
pWelsSvcRc->iIdrNum++;
|
||||
if (pWelsSvcRc->iIdrNum > 255)
|
||||
pWelsSvcRc->iIdrNum = 255;
|
||||
@ -1011,20 +1076,21 @@ void RcUpdateFrameComplexity (sWelsEncCtx* pEncCtx) {
|
||||
SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[kiTl];
|
||||
|
||||
int32_t iQStep = RcConvertQp2QStep (pWelsSvcRc->iAverageFrameQp);
|
||||
if (0 == pTOverRc->iPFrameNum) {
|
||||
pTOverRc->iLinearCmplx = ((int64_t)pWelsSvcRc->iFrameDqBits) * iQStep;
|
||||
} else {
|
||||
pTOverRc->iLinearCmplx = WELS_DIV_ROUND64 ((LINEAR_MODEL_DECAY_FACTOR * (int64_t)pTOverRc->iLinearCmplx
|
||||
+ (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * ((int64_t)pWelsSvcRc->iFrameDqBits * iQStep)),
|
||||
INT_MULTIPLY);
|
||||
}
|
||||
int32_t iAlpha = WELS_DIV_ROUND (INT_MULTIPLY, (1 + pTOverRc->iPFrameNum));
|
||||
if (iAlpha < SMOOTH_FACTOR_MIN_VALUE)
|
||||
iAlpha = SMOOTH_FACTOR_MIN_VALUE;
|
||||
pTOverRc->iFrameCmplxMean = WELS_DIV_ROUND ((LINEAR_MODEL_DECAY_FACTOR * static_cast<int64_t>
|
||||
(pTOverRc->iFrameCmplxMean)
|
||||
+ (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity),
|
||||
INT_MULTIPLY);
|
||||
if (0 == pTOverRc->iPFrameNum) {
|
||||
pTOverRc->iLinearCmplx = ((int64_t)pWelsSvcRc->iFrameDqBits) * iQStep;
|
||||
pTOverRc->iFrameCmplxMean = (int32_t)pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity;
|
||||
} else {
|
||||
pTOverRc->iLinearCmplx = WELS_DIV_ROUND64 (((LINEAR_MODEL_DECAY_FACTOR) * (int64_t)pTOverRc->iLinearCmplx
|
||||
+ (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * ((int64_t)pWelsSvcRc->iFrameDqBits * iQStep)),
|
||||
INT_MULTIPLY);
|
||||
pTOverRc->iFrameCmplxMean = WELS_DIV_ROUND (((LINEAR_MODEL_DECAY_FACTOR) * static_cast<int64_t>
|
||||
(pTOverRc->iFrameCmplxMean)
|
||||
+ (INT_MULTIPLY - LINEAR_MODEL_DECAY_FACTOR) * pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity),
|
||||
INT_MULTIPLY);
|
||||
}
|
||||
|
||||
|
||||
pTOverRc->iPFrameNum++;
|
||||
@ -1066,7 +1132,19 @@ void WelsRcPictureInitGom (sWelsEncCtx* pEncCtx, long long uiTimeStamp) {
|
||||
if (pEncCtx->uiTemporalId == 0) {
|
||||
RcUpdateTemporalZero (pEncCtx);
|
||||
}
|
||||
RcDecideTargetBits (pEncCtx);
|
||||
if (pEncCtx->pSvcParam->iRCMode == RC_TIMESTAMP_MODE) {
|
||||
RcDecideTargetBitsTimestamp (pEncCtx);
|
||||
pWelsSvcRc->uiLastTimeStamp = uiTimeStamp;
|
||||
} else {
|
||||
RcDecideTargetBits (pEncCtx);
|
||||
}
|
||||
//turn off GOM QP when slicenum is larger 1
|
||||
if ((pWelsSvcRc->iSliceNum > 1) || ((pEncCtx->pSvcParam->iRCMode == RC_BITRATE_MODE)
|
||||
&& (pEncCtx->eSliceType == I_SLICE))) {
|
||||
pWelsSvcRc->bEnableGomQp = false;
|
||||
} else
|
||||
pWelsSvcRc->bEnableGomQp = true;
|
||||
|
||||
//decide globe_qp
|
||||
if (pEncCtx->eSliceType == I_SLICE) {
|
||||
if (0 == pWelsSvcRc->iIdrNum)
|
||||
@ -1079,7 +1157,6 @@ void WelsRcPictureInitGom (sWelsEncCtx* pEncCtx, long long uiTimeStamp) {
|
||||
}
|
||||
RcInitSliceInformation (pEncCtx);
|
||||
RcInitGomParameters (pEncCtx);
|
||||
|
||||
}
|
||||
|
||||
void WelsRcPictureInfoUpdateGom (sWelsEncCtx* pEncCtx, int32_t iLayerSize) {
|
||||
@ -1114,21 +1191,22 @@ void WelsRcMbInitGom (sWelsEncCtx* pEncCtx, SMB* pCurMb, SSlice* pSlice) {
|
||||
const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset;
|
||||
|
||||
pSOverRc->iBsPosSlice = pEncCtx->pFuncList->pfGetBsPosition (pSlice);
|
||||
if ((pEncCtx->pSvcParam->iRCMode == RC_BITRATE_MODE) && (pEncCtx->eSliceType == I_SLICE)) {
|
||||
if (pWelsSvcRc->bEnableGomQp) {
|
||||
//calculate gom qp and target bits at the beginning of gom
|
||||
if (0 == (pCurMb->iMbXY % pWelsSvcRc->iNumberMbGom)) {
|
||||
if (pCurMb->iMbXY != pSOverRc->iStartMbSlice) {
|
||||
pSOverRc->iComplexityIndexSlice++;
|
||||
RcCalculateGomQp (pEncCtx, pCurMb, kiSliceId);
|
||||
}
|
||||
RcGomTargetBits (pEncCtx, kiSliceId);
|
||||
}
|
||||
|
||||
RcCalculateMbQp (pEncCtx, pCurMb, kiSliceId);
|
||||
} else {
|
||||
pCurMb->uiLumaQp = pEncCtx->iGlobalQp;
|
||||
pCurMb->uiChromaQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + kuiChromaQpIndexOffset)];
|
||||
return;
|
||||
}
|
||||
//calculate gom qp and target bits at the beginning of gom
|
||||
if (0 == (pCurMb->iMbXY % pWelsSvcRc->iNumberMbGom)) {
|
||||
if (pCurMb->iMbXY != pSOverRc->iStartMbSlice) {
|
||||
pSOverRc->iComplexityIndexSlice++;
|
||||
RcCalculateGomQp (pEncCtx, pCurMb, kiSliceId);
|
||||
}
|
||||
RcGomTargetBits (pEncCtx, kiSliceId);
|
||||
}
|
||||
|
||||
RcCalculateMbQp (pEncCtx, pCurMb, kiSliceId);
|
||||
}
|
||||
|
||||
void WelsRcMbInfoUpdateGom (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iCostLuma, SSlice* pSlice) {
|
||||
@ -1144,9 +1222,6 @@ void WelsRcMbInfoUpdateGom (sWelsEncCtx* pEncCtx, SMB* pCurMb, int32_t iCostLuma
|
||||
pSOverRc->iGomBitsSlice += iCurMbBits;
|
||||
|
||||
pWelsSvcRc->pGomCost[kiComplexityIndex] += iCostLuma;
|
||||
|
||||
pWelsSvcRc->iMinFrameQp = WELS_MIN (pWelsSvcRc->iMinFrameQp, pCurMb->uiLumaQp);
|
||||
pWelsSvcRc->iMaxFrameQp = WELS_MAX (pWelsSvcRc->iMaxFrameQp, pCurMb->uiLumaQp);
|
||||
if (iCurMbBits > 0) {
|
||||
pSOverRc->iTotalQpSlice += pCurMb->uiLumaQp;
|
||||
pSOverRc->iTotalMbSlice++;
|
||||
@ -1326,7 +1401,8 @@ void WelsRcFrameDelayJudgeTimeStamp (sWelsEncCtx* pEncCtx, long long uiTimeStamp
|
||||
pWelsSvcRc->iBufferSizePadding = WELS_DIV_ROUND (pDLayerConfig->iSpatialBitrate * PADDING_BUFFER_RATIO, INT_MULTIPLY);
|
||||
|
||||
pWelsSvcRc->iBufferFullnessSkip -= iSentBits;
|
||||
pWelsSvcRc->iBufferFullnessSkip = WELS_MAX (0, pWelsSvcRc->iBufferFullnessSkip);
|
||||
pWelsSvcRc->iBufferFullnessSkip = WELS_MAX ((-1) * (pDLayerConfig->iSpatialBitrate / 4),
|
||||
pWelsSvcRc->iBufferFullnessSkip);
|
||||
|
||||
if (pEncCtx->pSvcParam->bEnableFrameSkip) {
|
||||
pWelsSvcRc->bSkipFlag = true;
|
||||
@ -1340,121 +1416,10 @@ void WelsRcFrameDelayJudgeTimeStamp (sWelsEncCtx* pEncCtx, long long uiTimeStamp
|
||||
}
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"WelsRcFrameDelayJudgeTimeStamp iDidIdx = %d,iSkipFrameNum = %d,buffer = %" PRId64
|
||||
",threadhold = %d,bitrate = %d,iSentBits = %d,lasttimestamp = %lld,timestamp=%lld\n", iDidIdx,
|
||||
",threadhold = %d,bitrate = %d,iSentBits = %d,lasttimestamp = %lld,timestamp=%lld", iDidIdx,
|
||||
pWelsSvcRc->iSkipFrameNum, pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iBufferSizeSkip, iBitRate, iSentBits,
|
||||
pWelsSvcRc->uiLastTimeStamp, uiTimeStamp);
|
||||
}
|
||||
void WelsRcPictureInitGomTimeStamp (sWelsEncCtx* pEncCtx, long long uiTimeStamp) {
|
||||
SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId];
|
||||
SSpatialLayerConfig* pDLayerParam = &pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId];
|
||||
int32_t iLumaQp = pWelsSvcRc->iLastCalculatedQScale;
|
||||
int32_t iTl = pEncCtx->uiTemporalId;
|
||||
SRCTemporal* pTOverRc = &pWelsSvcRc->pTemporalOverRc[iTl];
|
||||
if (pEncCtx->eSliceType == I_SLICE) {
|
||||
if (0 == pWelsSvcRc->iIdrNum) { //iIdrNum == 0 means encoder has been initialed
|
||||
RcInitRefreshParameter (pEncCtx);
|
||||
}
|
||||
}
|
||||
if (RcJudgeBitrateFpsUpdate (pEncCtx)) {
|
||||
RcUpdateBitrateFps (pEncCtx);
|
||||
}
|
||||
if (pEncCtx->uiTemporalId == 0) {
|
||||
RcUpdateTemporalZero (pEncCtx);
|
||||
}
|
||||
//decide one frame bits allocated
|
||||
if (pEncCtx->eSliceType == I_SLICE) {
|
||||
if (0 == pWelsSvcRc->iIdrNum) { //iIdrNum == 0 means encoder has been initialed
|
||||
RcInitIdrQp (pEncCtx);
|
||||
iLumaQp = pWelsSvcRc->iInitialQp;
|
||||
pWelsSvcRc->iTargetBits = static_cast<int32_t> (((double) (pDLayerParam->iSpatialBitrate) / (double) (
|
||||
pDLayerParam->fFrameRate) *
|
||||
IDR_BITRATE_RATIO));
|
||||
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"[Rc] First IDR iSpatialBitrate = %d,iBufferFullnessSkip = %" PRId64 ",iTargetBits= %d,initQp = %d",
|
||||
pDLayerParam->iSpatialBitrate, pWelsSvcRc->iBufferFullnessSkip, pWelsSvcRc->iTargetBits,
|
||||
pWelsSvcRc->iInitialQp);
|
||||
|
||||
} else {
|
||||
int32_t iMaxTh = static_cast<int32_t> (pWelsSvcRc->iBufferSizeSkip - pWelsSvcRc->iBufferFullnessSkip);
|
||||
int32_t iMinTh = iMaxTh / 2;
|
||||
pWelsSvcRc->iTargetBits = static_cast<int32_t> (((double) (pDLayerParam->iSpatialBitrate) / (double) (
|
||||
pDLayerParam->fFrameRate) *
|
||||
IDR_BITRATE_RATIO));
|
||||
if (iMaxTh > 0) {
|
||||
pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, iMinTh, iMaxTh);
|
||||
|
||||
pWelsSvcRc->iQStep = WELS_DIV_ROUND (pWelsSvcRc->iIntraComplexity, pWelsSvcRc->iTargetBits);
|
||||
iLumaQp = RcConvertQStep2Qp (pWelsSvcRc->iQStep);
|
||||
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pWelsSvcRc->iLastCalculatedQScale - DELTA_QP_BGD_THD,
|
||||
pWelsSvcRc->iLastCalculatedQScale + DELTA_QP_BGD_THD);
|
||||
|
||||
} else {
|
||||
iLumaQp = pEncCtx->iGlobalQp + DELTA_QP_BGD_THD;
|
||||
}
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"[Rc]I iLumaQp = %d,iQStep = %d,iTargetBits = %d,iBufferFullnessSkip =%" PRId64
|
||||
",iMaxTh=%d,iMinTh = %d,iFrameComplexity= %" PRId64,
|
||||
iLumaQp, pWelsSvcRc->iQStep, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferFullnessSkip, iMaxTh, iMinTh,
|
||||
pWelsSvcRc->iIntraComplexity);
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
int32_t iMaxTh = static_cast<int32_t> (pWelsSvcRc->iBufferSizeSkip - pWelsSvcRc->iBufferFullnessSkip);
|
||||
int32_t iMinTh = iMaxTh / (iTl + 2);
|
||||
|
||||
SSpatialLayerInternal* pDLayerParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId];
|
||||
const int32_t kiGopSize = (1 << pDLayerParamInternal->iDecompositionStages);
|
||||
int32_t iAverageFrameSize = (int32_t) ((double) (pDLayerParam->iSpatialBitrate) / (double) (pDLayerParam->fFrameRate));
|
||||
const int32_t kiGopBits = iAverageFrameSize * kiGopSize;
|
||||
int64_t iCmplxRatio = WELS_DIV_ROUND64 (pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity * INT_MULTIPLY,
|
||||
pTOverRc->iFrameCmplxMean);
|
||||
iCmplxRatio = WELS_CLIP3 (iCmplxRatio, INT_MULTIPLY - FRAME_CMPLX_RATIO_RANGE, INT_MULTIPLY + FRAME_CMPLX_RATIO_RANGE);
|
||||
|
||||
pWelsSvcRc->iTargetBits = WELS_DIV_ROUND (pTOverRc->iTlayerWeight * kiGopBits, INT_MULTIPLY * 10 * 2);
|
||||
if (iMaxTh > 0) {
|
||||
pWelsSvcRc->iTargetBits = WELS_CLIP3 (pWelsSvcRc->iTargetBits, iMinTh, iMaxTh);
|
||||
if (0 == pTOverRc->iPFrameNum)
|
||||
iLumaQp = pWelsSvcRc->iInitialQp + DELTA_QP_BGD_THD;
|
||||
else {
|
||||
|
||||
pWelsSvcRc->iQStep = WELS_DIV_ROUND ((pTOverRc->iLinearCmplx * iCmplxRatio), (pWelsSvcRc->iTargetBits * INT_MULTIPLY));
|
||||
iLumaQp = RcConvertQStep2Qp (pWelsSvcRc->iQStep);
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pWelsSvcRc->iLastCalculatedQScale - DELTA_QP_BGD_THD,
|
||||
pWelsSvcRc->iLastCalculatedQScale + DELTA_QP_BGD_THD);
|
||||
}
|
||||
} else {
|
||||
iLumaQp = pEncCtx->iGlobalQp + DELTA_QP_BGD_THD;
|
||||
}
|
||||
|
||||
iLumaQp = WELS_CLIP3 (iLumaQp, pTOverRc->iMinQp, pTOverRc->iMaxQp);
|
||||
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"[Rc]P iTl = %d,iLumaQp = %d,iQStep = %d,iTargetBits = %d,iBufferFullnessSkip =%" PRId64
|
||||
",iMaxTh=%d,iMinTh = %d,iFrameComplexity= %lld,iCmplxRatio=%" PRId64,
|
||||
iTl, iLumaQp, pWelsSvcRc->iQStep, pWelsSvcRc->iTargetBits, pWelsSvcRc->iBufferFullnessSkip, iMaxTh, iMinTh,
|
||||
pEncCtx->pVaa->sComplexityAnalysisParam.iFrameComplexity, iCmplxRatio);
|
||||
}
|
||||
|
||||
pWelsSvcRc->iQStep = RcConvertQp2QStep (iLumaQp);
|
||||
pWelsSvcRc->iLastCalculatedQScale = iLumaQp;
|
||||
pEncCtx->iGlobalQp = iLumaQp;
|
||||
|
||||
RcInitSliceInformation (pEncCtx);
|
||||
RcInitGomParameters (pEncCtx);
|
||||
float fInstantFps = (uiTimeStamp - pWelsSvcRc->uiLastTimeStamp) > 0 ? (1000.0f / (uiTimeStamp -
|
||||
pWelsSvcRc->uiLastTimeStamp)) : 0;
|
||||
WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_DEBUG,
|
||||
"[Rc]Tid = %d,Did = %d,pEncCtx->iGlobalQp= %d,iLumaQp = %d,uiTimeStamp = %lld,uiLastTimeStamp = %lld,InstantFps = %f,settingFps = %f",
|
||||
pEncCtx->uiTemporalId, pEncCtx->uiDependencyId,
|
||||
pEncCtx->iGlobalQp, iLumaQp, uiTimeStamp, pWelsSvcRc->uiLastTimeStamp,
|
||||
fInstantFps, pDLayerParam->fFrameRate);
|
||||
pWelsSvcRc->uiLastTimeStamp = uiTimeStamp;
|
||||
|
||||
}
|
||||
|
||||
void WelsRcPictureInfoUpdateGomTimeStamp (sWelsEncCtx* pEncCtx, int32_t iLayerSize) {
|
||||
SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId];
|
||||
@ -1531,7 +1496,7 @@ void WelsRcInitFuncPointers (sWelsEncCtx* pEncCtx, RC_MODES iRcMode) {
|
||||
pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateDisable;
|
||||
|
||||
} else {
|
||||
pRcf->pfWelsRcPictureInit = WelsRcPictureInitGomTimeStamp;
|
||||
pRcf->pfWelsRcPictureInit = WelsRcPictureInitGom;
|
||||
pRcf->pfWelsRcPictureInfoUpdate = WelsRcPictureInfoUpdateGomTimeStamp;
|
||||
pRcf->pfWelsRcMbInit = WelsRcMbInitGom;
|
||||
pRcf->pfWelsRcMbInfoUpdate = WelsRcMbInfoUpdateGom;
|
||||
|
@ -370,6 +370,7 @@ WELS_EXTERN WelsDequantIHadamard4x4_sse2
|
||||
ret
|
||||
|
||||
|
||||
%ifdef HAVE_AVX2
|
||||
; data=%1 abs_out=%2 ff=%3 mf=%4 7FFFh=%5
|
||||
%macro AVX2_Quant 5
|
||||
vpabsw %2, %1
|
||||
@ -502,3 +503,5 @@ WELS_EXTERN WelsQuantFour4x4Max_avx2
|
||||
POP_XMM
|
||||
LOAD_4_PARA_POP
|
||||
ret
|
||||
%endif
|
||||
|
||||
|
@ -278,7 +278,7 @@ inline void CBackgroundDetection::ForegroundDilation (SBackgroundOU* pBackground
|
||||
|
||||
// chroma component check
|
||||
if (pBackgroundOU->iBackgroundFlag == 1) {
|
||||
int8_t iNeighbourForegroundFlags = !pOUNeighbours[0]->iBackgroundFlag | ((!pOUNeighbours[1]->iBackgroundFlag) << 1)
|
||||
int8_t iNeighbourForegroundFlags = (!pOUNeighbours[0]->iBackgroundFlag) | ((!pOUNeighbours[1]->iBackgroundFlag) << 1)
|
||||
| ((!pOUNeighbours[2]->iBackgroundFlag) << 2) | ((!pOUNeighbours[3]->iBackgroundFlag) << 3);
|
||||
pBackgroundOU->iBackgroundFlag = !ForegroundDilation23Chroma (iNeighbourForegroundFlags, iChromaSampleStartPos,
|
||||
iPicStrideUV, pBgdParam);
|
||||
|
@ -45,6 +45,7 @@ CDownsampling::CDownsampling (int32_t iCpuFlag) {
|
||||
m_eMethod = METHOD_DOWNSAMPLE;
|
||||
WelsMemset (&m_pfDownsample, 0, sizeof (m_pfDownsample));
|
||||
InitDownsampleFuncs (m_pfDownsample, m_iCPUFlag);
|
||||
WelsMemset(m_pSampleBuffer,0,sizeof(m_pSampleBuffer));
|
||||
m_bNoSampleBuffer = AllocateSampleBuffer();
|
||||
}
|
||||
|
||||
@ -106,10 +107,12 @@ void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int
|
||||
sDownsampleFunc.pfQuarterDownsampler = DyadicBilinearQuarterDownsampler_sse4;
|
||||
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_sse41;
|
||||
}
|
||||
#ifdef HAVE_AVX2
|
||||
if (iCpuFlag & WELS_CPU_AVX2) {
|
||||
sDownsampleFunc.pfGeneralRatioChroma = GeneralBilinearAccurateDownsamplerWrap_avx2;
|
||||
sDownsampleFunc.pfGeneralRatioLuma = GeneralBilinearFastDownsamplerWrap_avx2;
|
||||
}
|
||||
#endif
|
||||
#endif//X86_ASM
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
|
@ -99,8 +99,10 @@ GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_sse2;
|
||||
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse2;
|
||||
GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_ssse3;
|
||||
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_sse41;
|
||||
#ifdef HAVE_AVX2
|
||||
GeneralDownsampleFunc GeneralBilinearFastDownsamplerWrap_avx2;
|
||||
GeneralDownsampleFunc GeneralBilinearAccurateDownsamplerWrap_avx2;
|
||||
#endif
|
||||
|
||||
SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_ssse3;
|
||||
SpecificDownsampleFunc DyadicBilinearOneThirdDownsampler_sse4;
|
||||
@ -120,12 +122,14 @@ void GeneralBilinearFastDownsampler_ssse3 (uint8_t* pDst, int32_t iDstStride, in
|
||||
void GeneralBilinearAccurateDownsampler_sse41 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
||||
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
||||
uint32_t uiScaleY);
|
||||
#ifdef HAVE_AVX2
|
||||
void GeneralBilinearFastDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
||||
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
||||
uint32_t uiScaleY);
|
||||
void GeneralBilinearAccurateDownsampler_avx2 (uint8_t* pDst, int32_t iDstStride, int32_t iDstWidth,
|
||||
int32_t iDstHeight, uint8_t* pSrc, int32_t iSrcStride, uint32_t uiScaleX,
|
||||
uint32_t uiScaleY);
|
||||
#endif
|
||||
|
||||
WELSVP_EXTERN_C_END
|
||||
#endif
|
||||
|
@ -284,8 +284,10 @@ DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (sse2)
|
||||
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse2)
|
||||
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (ssse3)
|
||||
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (sse41)
|
||||
#ifdef HAVE_AVX2
|
||||
DEFINE_GENERAL_BILINEAR_FAST_DOWNSAMPLER_WRAP (avx2)
|
||||
DEFINE_GENERAL_BILINEAR_ACCURATE_DOWNSAMPLER_WRAP (avx2)
|
||||
#endif
|
||||
#endif //X86_ASM
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
|
@ -3254,6 +3254,7 @@ WELS_EXTERN GeneralBilinearAccurateDownsampler_sse41
|
||||
%undef xmm_xfrac1_begin
|
||||
%undef xmm_xfrac_inc
|
||||
|
||||
%ifdef HAVE_AVX2
|
||||
; xpos_int=%1 xpos_frac=%2 inc_int+1=%3 inc_frac=%4 tmp=%5
|
||||
%macro AVX2_BilinearIncXposuw 5
|
||||
vpaddusw %5, %2, %4
|
||||
@ -4552,3 +4553,5 @@ WELS_EXTERN GeneralBilinearAccurateDownsampler_avx2
|
||||
%undef ymm_xfrac0_begin
|
||||
%undef ymm_xfrac1_begin
|
||||
%undef ymm_xfrac_inc
|
||||
%endif
|
||||
|
||||
|
@ -2088,6 +2088,7 @@ sqdiff_bgd_width_loop:
|
||||
%assign push_num push_num - stack_alloc_num
|
||||
%endmacro
|
||||
|
||||
%ifdef HAVE_AVX2
|
||||
; Max unsigned byte per quadword
|
||||
; out=%1 in=%2 tmp=%3
|
||||
%macro AVX2_Maxubq 3
|
||||
@ -3557,3 +3558,6 @@ WELS_EXTERN VAACalcSadSsdBgd_avx2
|
||||
%undef p_sd8x8
|
||||
%undef p_mad8x8
|
||||
ret
|
||||
|
||||
%endif
|
||||
|
||||
|
@ -130,8 +130,8 @@ TEST_P (DecodeEncodeTest, CompareOutput) {
|
||||
}
|
||||
}
|
||||
static const DecodeEncodeFileParam kFileParamArray[] = {
|
||||
{"res/test_vd_1d.264", "cb3ec7a1bf37d0c08118f00009befb6f11dace3c", 320, 192, 12.0f},
|
||||
{"res/test_vd_rc.264", "f231547ee7a3e6e4f1a05425280c41f285df390c", 320, 192, 12.0f},
|
||||
{"res/test_vd_1d.264", "18929a1618c3c67e9808f23bc483816d408e2caa", 320, 192, 12.0f},
|
||||
{"res/test_vd_rc.264", "2fe1c4e03e03c9377486b05bac032989e754cd0e", 320, 192, 12.0f},
|
||||
};
|
||||
|
||||
|
||||
|
@ -103,54 +103,54 @@ TEST_P (EncoderOutputTest, CompareOutput) {
|
||||
static const EncodeFileParam kFileParamArray[] = {
|
||||
{
|
||||
"res/CiscoVT2people_320x192_12fps.yuv",
|
||||
{"ee97bab09041362fbe8aed16f69a55f16b106f92"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
{"6426c5d4d8d65ddd4c540f41c629a25450a2b7e0"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/CiscoVT2people_160x96_6fps.yuv",
|
||||
{"37b4f5b7b77b362dee9a74ac4b2bc043537f2dd0"}, CAMERA_VIDEO_REAL_TIME, 160, 96, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
{"24c9968cda12fdc522fd711ceb80d82b8e3df78e"}, CAMERA_VIDEO_REAL_TIME, 160, 96, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/Static_152_100.yuv",
|
||||
{"b220be0163974e36fbd6662236d4e05566a21546"}, CAMERA_VIDEO_REAL_TIME, 152, 100, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
{"60630b2b10f0d339ae94464777a64a068401bdd5"}, CAMERA_VIDEO_REAL_TIME, 152, 100, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/CiscoVT2people_320x192_12fps.yuv",
|
||||
{"21dbfaaf4f09af735298434c1f97cf95a464165b"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_RASTER_SLICE, false, 1, false, false, false // One slice per MB row
|
||||
{"27ad06d87e71bc5aa19877338e6d00774d5ec307"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_RASTER_SLICE, false, 1, false, false, false // One slice per MB row
|
||||
},
|
||||
{
|
||||
"res/CiscoVT2people_320x192_12fps.yuv",
|
||||
{"409ced068a5a313cad7c654d27ab7a2edaed4630"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, true, 1, false, false, false
|
||||
{"2cd791e7a176618d83b35fc014d6ac2eebb630d2"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, true, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/CiscoVT2people_320x192_12fps.yuv",
|
||||
// Allow for different output depending on whether averaging is done
|
||||
// vertically or horizontally first when downsampling.
|
||||
{ "a5341d588b769809c1f1d983e5a0fcef7362f3ad", "73156dfc1dc45924349b5b79f8debcac13d7231d" },
|
||||
{ "bf8be81e370bec0866e3a7851ef853efec2698ee", "a70ed5995fcb021ee2a1e2f731894a26b041a868" },
|
||||
CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 2, false, false, false
|
||||
},
|
||||
{
|
||||
"res/Cisco_Absolute_Power_1280x720_30fps.yuv",
|
||||
{"8bc8813ee262b356e53eec6dbdc3c88d7d2b7b5c"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, false
|
||||
{"102b4ad80578d3fad3f8ab72f4903670f23065be"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/Cisco_Absolute_Power_1280x720_30fps.yuv",
|
||||
// Allow for different output depending on whether averaging is done
|
||||
// vertically or horizontally first when downsampling.
|
||||
{ "ec9d776a7d92cf0f6640065aee8af2450af0e993", "3943145545a2bd27a642b2045d4e3dbae55c6870" },
|
||||
{ "2f4bc88c79771b6e197e215ab6a9d3064f02c2aa", "5af0d6e97f039b5252cf2433c54ec68f354a4115" },
|
||||
CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SINGLE_SLICE, false, 4, false, false, false
|
||||
},
|
||||
// the following values may be adjusted for times since we start tuning the strategy
|
||||
{
|
||||
"res/CiscoVT2people_320x192_12fps.yuv",
|
||||
{"c384a0dafc46573d02a38d8323304c5e1309d9d0"}, SCREEN_CONTENT_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
{"a075c04a33bc4e326ae4cc60a8f45938dbfa8b24"}, SCREEN_CONTENT_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/CiscoVT2people_160x96_6fps.yuv",
|
||||
{"637d50652f9bb8750359c4b418f30a039908d56d"}, SCREEN_CONTENT_REAL_TIME, 160, 96, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
{"7b7fa895ea1099402ce4c0c4edc92e383e7badce"}, SCREEN_CONTENT_REAL_TIME, 160, 96, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/Static_152_100.yuv",
|
||||
{"77a140c6bd80a6479ee13aecd2a8dac0a17cf03d"}, SCREEN_CONTENT_REAL_TIME, 152, 100, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
{"04ab90de551f81d0900b7c661925f6922e36befe"}, SCREEN_CONTENT_REAL_TIME, 152, 100, 6.0f, SM_SINGLE_SLICE, false, 1, false, false, false
|
||||
},
|
||||
{
|
||||
"res/Cisco_Absolute_Power_1280x720_30fps.yuv",
|
||||
@ -163,15 +163,15 @@ static const EncodeFileParam kFileParamArray[] = {
|
||||
},
|
||||
{
|
||||
"res/CiscoVT2people_320x192_12fps.yuv",
|
||||
{"04ad01bb3872f7dae055c1ec661218f41a020dac"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1, false, false, true //turn on cabac
|
||||
{"1103db3a06f10d3456919f41a5e1552d42c3b172"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1, false, false, true //turn on cabac
|
||||
},
|
||||
{
|
||||
"res/Cisco_Absolute_Power_1280x720_30fps.yuv",
|
||||
{"f5a92b7a0c00691e04bf1306c8251a74f989e4e2"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, true
|
||||
{"18d9a8b390c705e804e184caf95a802e24c9f0b9"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, true
|
||||
},
|
||||
{
|
||||
"res/Cisco_Absolute_Power_1280x720_30fps.yuv",
|
||||
{"a27539982433279faa9975c96eaec28df770223e"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_FIXEDSLCNUM_SLICE, false, 1, false, false, true
|
||||
{"904da3557751bab4f0e2f0acfe12963b03acc108"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_FIXEDSLCNUM_SLICE, false, 1, false, false, true
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -53,6 +53,7 @@ void SetNonZeroCount_ref (int8_t* pNonZeroCount) {
|
||||
}
|
||||
|
||||
#if defined(X86_ASM)
|
||||
#if defined(HAVE_AVX2)
|
||||
void IdctFourResAddPred_ref (uint8_t* pPred, int32_t iStride, int16_t* pRs) {
|
||||
IdctResAddPred_ref (pPred + 0 * iStride + 0, iStride, pRs + 0 * 16);
|
||||
IdctResAddPred_ref (pPred + 0 * iStride + 4, iStride, pRs + 1 * 16);
|
||||
@ -60,6 +61,7 @@ void IdctFourResAddPred_ref (uint8_t* pPred, int32_t iStride, int16_t* pRs) {
|
||||
IdctResAddPred_ref (pPred + 4 * iStride + 4, iStride, pRs + 3 * 16);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
} // anon ns
|
||||
|
||||
@ -138,9 +140,11 @@ GENERATE_IDCTRESADDPRED (IdctResAddPred_c, 0)
|
||||
#if defined(X86_ASM)
|
||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_mmx, WELS_CPU_MMXEXT)
|
||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_sse2, WELS_CPU_SSE2)
|
||||
#if defined(HAVE_AVX2)
|
||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_avx2, WELS_CPU_AVX2)
|
||||
GENERATE_IDCTFOURRESADDPRED (IdctFourResAddPred_avx2, WELS_CPU_AVX2)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
GENERATE_IDCTRESADDPRED (IdctResAddPred_neon, WELS_CPU_NEON)
|
||||
|
@ -266,7 +266,7 @@ void AnchorPredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
|
||||
|
||||
int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
|
||||
int32_t iLeftTopType, iRightTopType, iTopType, iLeftType;
|
||||
int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy, iLeftTopXy, iRightTopXy = 0;
|
||||
int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
|
||||
|
||||
int8_t iLeftRef;
|
||||
int8_t iTopRef;
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -372,12 +372,15 @@ GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_ssse3
|
||||
WELS_CPU_SSSE3)
|
||||
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_sse41,
|
||||
GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_SSE41)
|
||||
#ifdef HAVE_AVX2
|
||||
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearFastDownsamplerWrap_avx2, GeneralBilinearFastDownsampler_ref, 1,
|
||||
WELS_CPU_AVX2)
|
||||
GENERATE_GeneralBilinearDownsampler_UT (GeneralBilinearAccurateDownsamplerWrap_avx2,
|
||||
GeneralBilinearAccurateDownsampler_ref, 1, WELS_CPU_AVX2)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsamplerWidthx32_neon, 1, WELS_CPU_NEON)
|
||||
GENERATE_DyadicBilinearDownsampler_UT (DyadicBilinearDownsampler_neon, 1, WELS_CPU_NEON)
|
||||
|
Loading…
x
Reference in New Issue
Block a user