add sub8x8 mode decision functions

This commit is contained in:
huili2 2015-06-08 11:06:32 +08:00
parent 82a0d3e4a2
commit 57bd721b2f
9 changed files with 568 additions and 86 deletions

View File

@ -1292,8 +1292,10 @@ void InitMcFunc (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
#if defined (X86_ASM)
if (uiCpuFlag & WELS_CPU_SSE2) {
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
#if 1 //could not work well for sub8x8: should disable it for now, or bugfix for it!
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
#endif
pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
pMcFuncs->pMcChromaFunc = McChroma_sse2;
pMcFuncs->pMcLumaFunc = McLuma_sse2;

View File

@ -83,6 +83,30 @@ void update_P8x16_motion_info (SMbCache* pMbCache, SMB* pCurMb, const int32_t ki
void UpdateP8x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
SMVUnitXY* pMv);
/*!
* \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_4x4
* \param
* \param
*/
void UpdateP4x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
SMVUnitXY* pMv);
/*!
* \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_8x4
* \param
* \param
*/
void UpdateP8x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
SMVUnitXY* pMv);
/*!
* \brief update pMv and uiRefIndex cache for current MB and pMbCache, only for P_4x8
* \param
* \param
*/
void UpdateP4x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
SMVUnitXY* pMv);
/*!
* \brief get the motion predictor for 4*4 or 8*8 or 16*16 block
* \param
@ -135,11 +159,33 @@ void UpdateP16x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef,
* \param
*/
void UpdateP8x16Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
/*!
* \brief only update pMv cache for current MB, only for P_8x8
* \param
* \param
*/
void UpdateP8x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
/*!
* \brief only update pMv cache for current MB, only for P_4x4
* \param
* \param
*/
void UpdateP4x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
/*!
* \brief only update pMv cache for current MB, only for P_8x4
* \param
* \param
*/
void UpdateP8x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
/*!
* \brief only update pMv cache for current MB, only for P_4x8
* \param
* \param
*/
void UpdateP4x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t iRef, SMVUnitXY* pMv);
}
#endif//WELS_MV_PRED_H__

View File

@ -63,6 +63,9 @@ int32_t WelsMdP16x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* p
int32_t WelsMdP16x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice);
int32_t WelsMdP8x16 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice);
int32_t WelsMdP8x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice);
int32_t WelsMdP4x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx);
int32_t WelsMdP8x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx);
int32_t WelsMdP4x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice, const int32_t ki8x8Idx);
/*static*/ void WelsMdInterInit (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb, const int32_t kiSliceFirstMbXY);
/*static*/ void WelsMdInterFinePartition (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, int32_t bestCost);
/*static*/ void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEnc, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, int32_t bestCost);

View File

@ -50,6 +50,7 @@ typedef struct TagMB {
/*************************mb_layer() syntax and generated********************************/
/*mb_layer():*/
Mb_Type uiMbType; // including MB detailed partition type, number and type of reference list
Mb_Type uiSubMbType[4]; // sub MB types
int32_t iMbXY; // offset position of MB top left point based
int16_t iMbX; // position of MB in horizontal axis [0..32767]
int16_t iMbY; // position of MB in vertical axis [0..32767]
@ -71,7 +72,7 @@ uint8_t uiChromaQp;
uint16_t uiSliceIdc; // 2^16=65536 > MaxFS(36864) of level 5.1; AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
uint32_t uiChromPredMode;
int32_t iLumaDQp;
SMVUnitXY sMvd[4];
SMVUnitXY sMvd[MB_BLOCK4x4_NUM]; //only for CABAC writing; storage structure the same as sMv, in 4x4 scan order.
int32_t iCbpDc;
//uint8_t reserved_filling_bytes[1]; // not deleting this line for further changes of this structure. filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
} SMB, *PMb;

View File

@ -301,7 +301,51 @@ void UpdateP8x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPart
pMvComp->sMotionVectorCache[kiCacheIdx6] =
pMvComp->sMotionVectorCache[kiCacheIdx7] = *pMv;
}
//update uiRefIndex and pMv of both SMB and Mb_cache, only for P4x4
void UpdateP4x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
SMVUnitXY* pMv) {
SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx];
const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx];
//mb
pCurMb->sMv[kiScan4Idx] = *pMv;
//cache
pMvComp->iRefIndexCache[kiCacheIdx] = kiRef;
pMvComp->sMotionVectorCache[kiCacheIdx] = *pMv;
}
//update uiRefIndex and pMv of both SMB and Mb_cache, only for P8x4
void UpdateP8x4MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
SMVUnitXY* pMv) {
SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx];
const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx];
//mb
pCurMb->sMv[ kiScan4Idx] = *pMv;
pCurMb->sMv[1 + kiScan4Idx] = *pMv;
//cache
pMvComp->iRefIndexCache[ kiCacheIdx] = kiRef;
pMvComp->iRefIndexCache[1 + kiCacheIdx] = kiRef;
pMvComp->sMotionVectorCache[ kiCacheIdx] = *pMv;
pMvComp->sMotionVectorCache[1 + kiCacheIdx] = *pMv;
}
//update uiRefIndex and pMv of both SMB and Mb_cache, only for P4x8
void UpdateP4x8MotionInfo (SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef,
SMVUnitXY* pMv) {
SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx];
const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx];
//mb
pCurMb->sMv[ kiScan4Idx] = *pMv;
pCurMb->sMv[4 + kiScan4Idx] = *pMv;
//cache
pMvComp->iRefIndexCache[ kiCacheIdx] = kiRef;
pMvComp->iRefIndexCache[6 + kiCacheIdx] = kiRef;
pMvComp->sMotionVectorCache[ kiCacheIdx] = *pMv;
pMvComp->sMotionVectorCache[6 + kiCacheIdx] = *pMv;
}
//=========================update motion info(MV and ref_idx) into Mb_cache==========================
//update pMv and uiRefIndex cache only for Mb_cache, only for P_16*16 (SKIP inclusive)
@ -359,4 +403,34 @@ void UpdateP8x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef,
pMvComp->sMotionVectorCache[7 + kuiCacheIdx] = *pMv;
}
//update uiRefIndex and pMv of only Mb_cache, for P4x4
void UpdateP4x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) {
SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
pMvComp->iRefIndexCache [kuiCacheIdx] = pRef;
pMvComp->sMotionVectorCache[kuiCacheIdx] = *pMv;
}
//update uiRefIndex and pMv of only Mb_cache, for P8x4
void UpdateP8x4Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) {
SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
pMvComp->iRefIndexCache [ kuiCacheIdx] =
pMvComp->iRefIndexCache [1 + kuiCacheIdx] = pRef;
pMvComp->sMotionVectorCache [ kuiCacheIdx] =
pMvComp->sMotionVectorCache[1 + kuiCacheIdx] = *pMv;
}
//update uiRefIndex and pMv of only Mb_cache, for P4x8
void UpdateP4x8Motion2Cache (SMbCache* pMbCache, int32_t iPartIdx, int8_t pRef, SMVUnitXY* pMv) {
SMVComponentUnit* pMvComp = &pMbCache->sMvComponents;
const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
pMvComp->iRefIndexCache [ kuiCacheIdx] =
pMvComp->iRefIndexCache [6 + kuiCacheIdx] = pRef;
pMvComp->sMotionVectorCache [ kuiCacheIdx] =
pMvComp->sMotionVectorCache[6 + kuiCacheIdx] = *pMv;
}
} // namespace WelsEnc

View File

@ -1117,6 +1117,124 @@ int32_t WelsMdP8x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWe
return iCostP8x8;
}
int32_t WelsMdP4x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice,
const int32_t ki8x8Idx) {
SMbCache* pMbCache = &pSlice->sMbCacheInfo;
int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0];
int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0];
SWelsME* sMe4x4;
int32_t i4x4Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef;
int32_t iCostP4x4 = 0;
for (i4x4Idx = 0; i4x4Idx < 4; ++i4x4Idx) {
int32_t iPartIdx = (ki8x8Idx << 2) + i4x4Idx;
iIdxX = ((ki8x8Idx & 1) << 1) + (i4x4Idx & 1);
iIdxY = ((ki8x8Idx >> 1) << 1) + (i4x4Idx >> 1);
iPixelX = (iIdxX << 2);
iPixelY = (iIdxY << 2);
iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc);
iStrideRef = iPixelX + (iPixelY * iLineSizeRef);
sMe4x4 = &pWelsMd->sMe.sMe4x4[ki8x8Idx][i4x4Idx];
InitMe (*pWelsMd, BLOCK_4x4,
pMbCache->SPicData.pEncMb[0] + iStrideEnc,
pMbCache->SPicData.pRefMb[0] + iStrideRef,
pCurDqLayer->pRefPic->pScreenBlockFeatureStorage,
*sMe4x4);
//not putting these three lines below into InitMe to avoid judging mode in InitMe
sMe4x4->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX;
sMe4x4->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY;
sMe4x4->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2;
pSlice->sMvc[0] = sMe4x4->sMvBase;
pSlice->uiMvcNum = 1;
PredMv (&pMbCache->sMvComponents, iPartIdx, 1, pWelsMd->uiRef, & (sMe4x4->sMvp));
pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe4x4, pSlice);
UpdateP4x4Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe4x4->sMv));
iCostP4x4 += sMe4x4->uiSatdCost;
}
return iCostP4x4;
}
int32_t WelsMdP8x4 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice,
const int32_t ki8x8Idx) {
SMbCache* pMbCache = &pSlice->sMbCacheInfo;
int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0];
int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0];
SWelsME* sMe8x4;
int32_t i8x4Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef;
int32_t iCostP8x4 = 0;
for (i8x4Idx = 0; i8x4Idx < 2; ++i8x4Idx) {
int32_t iPartIdx = (ki8x8Idx << 2) + (i8x4Idx << 1);
iIdxX = ((ki8x8Idx & 1) << 1);
iIdxY = ((ki8x8Idx >> 1) << 1) + i8x4Idx;
iPixelX = (iIdxX << 2);
iPixelY = (iIdxY << 2);
iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc);
iStrideRef = iPixelX + (iPixelY * iLineSizeRef);
sMe8x4 = &pWelsMd->sMe.sMe8x4[ki8x8Idx][i8x4Idx];
InitMe (*pWelsMd, BLOCK_8x4,
pMbCache->SPicData.pEncMb[0] + iStrideEnc,
pMbCache->SPicData.pRefMb[0] + iStrideRef,
pCurDqLayer->pRefPic->pScreenBlockFeatureStorage,
*sMe8x4);
//not putting these three lines below into InitMe to avoid judging mode in InitMe
sMe8x4->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX;
sMe8x4->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY;
sMe8x4->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2;
pSlice->sMvc[0] = sMe8x4->sMvBase;
pSlice->uiMvcNum = 1;
PredMv (&pMbCache->sMvComponents, iPartIdx, 2, pWelsMd->uiRef, & (sMe8x4->sMvp));
pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe8x4, pSlice);
UpdateP8x4Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe8x4->sMv));
iCostP8x4 += sMe8x4->uiSatdCost;
}
return iCostP8x4;
}
int32_t WelsMdP4x8 (SWelsFuncPtrList* pFunc, SDqLayer* pCurDqLayer, SWelsMD* pWelsMd, SSlice* pSlice,
const int32_t ki8x8Idx) {
//Wayne, to be modified
SMbCache* pMbCache = &pSlice->sMbCacheInfo;
int32_t iLineSizeEnc = pCurDqLayer->iEncStride[0];
int32_t iLineSizeRef = pCurDqLayer->pRefPic->iLineSize[0];
SWelsME* sMe4x8;
int32_t i4x8Idx, iIdxX, iIdxY, iPixelX, iPixelY, iStrideEnc, iStrideRef;
int32_t iCostP4x8 = 0;
for (i4x8Idx = 0; i4x8Idx < 2; ++i4x8Idx) {
int32_t iPartIdx = (ki8x8Idx << 2) + i4x8Idx;
iIdxX = ((ki8x8Idx & 1) << 1) + i4x8Idx;
iIdxY = ((ki8x8Idx >> 1) << 1);
iPixelX = (iIdxX << 2);
iPixelY = (iIdxY << 2);
iStrideEnc = iPixelX + (iPixelY * iLineSizeEnc);
iStrideRef = iPixelX + (iPixelY * iLineSizeRef);
sMe4x8 = &pWelsMd->sMe.sMe4x8[ki8x8Idx][i4x8Idx];
InitMe (*pWelsMd, BLOCK_4x8,
pMbCache->SPicData.pEncMb[0] + iStrideEnc,
pMbCache->SPicData.pRefMb[0] + iStrideRef,
pCurDqLayer->pRefPic->pScreenBlockFeatureStorage,
*sMe4x8);
//not putting these three lines below into InitMe to avoid judging mode in InitMe
sMe4x8->iCurMeBlockPixX = pWelsMd->iMbPixX + iPixelX;
sMe4x8->iCurMeBlockPixY = pWelsMd->iMbPixY + iPixelY;
sMe4x8->uSadPredISatd.uiSadPred = pWelsMd->iSadPredMb >> 2;
pSlice->sMvc[0] = sMe4x8->sMvBase;
pSlice->uiMvcNum = 1;
PredMv (&pMbCache->sMvComponents, iPartIdx, 1, pWelsMd->uiRef, & (sMe4x8->sMvp));
pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe4x8, pSlice);
UpdateP4x8Motion2Cache (pMbCache, iPartIdx, pWelsMd->uiRef, & (sMe4x8->sMv));
iCostP4x8 += sMe4x8->uiSatdCost;
}
return iCostP4x8;
}
void WelsMdInterFinePartition (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) {
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
// SMbCache *pMbCache = &pSlice->sMbCacheInfo;
@ -1129,6 +1247,7 @@ void WelsMdInterFinePartition (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* p
if (iCost < iBestCost) {
int32_t iCostPart;
pCurMb->uiMbType = MB_TYPE_8x8;
pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
// WelsLog( pEncCtx, WELS_LOG_INFO, "WelsMdP16x8, p_ref[0]= 0x%p", pMbCache->SPicData.pRefMb[0]);
iCostPart = WelsMdP16x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice);
@ -1190,6 +1309,7 @@ void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice
if (iCostP8x8 < iBestCost) {
iBestCost = iCostP8x8;
pCurMb->uiMbType = MB_TYPE_8x8;
pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
}
break;
@ -1198,6 +1318,7 @@ void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice
if (iCostP8x8 < iBestCost) {
iBestCost = iCostP8x8;
pCurMb->uiMbType = MB_TYPE_8x8;
pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
iCostP16x8 = WelsMdP16x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice);
if (iCostP16x8 <= iBestCost) {
@ -1426,6 +1547,32 @@ bool WelsMdPSkipEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCac
const int32_t g_kiPixStrideIdx8x8[4] = { 0, ME_REFINE_BUF_WIDTH_BLK8,
ME_REFINE_BUF_STRIDE_BLK8, ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8
};
const int32_t g_kiPixStrideIdx4x4[4][4] = {
{
0,
0 + ME_REFINE_BUF_WIDTH_BLK4,
0 + ME_REFINE_BUF_STRIDE_BLK4,
0 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
}, //[0][]
{
ME_REFINE_BUF_WIDTH_BLK8,
ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4,
ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_STRIDE_BLK4,
ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
}, //[1][]
{
ME_REFINE_BUF_STRIDE_BLK8,
ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK4,
ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_STRIDE_BLK4,
ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
}, //[2][]
{
ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8,
ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4,
ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_STRIDE_BLK4,
ME_REFINE_BUF_STRIDE_BLK8 + ME_REFINE_BUF_WIDTH_BLK8 + ME_REFINE_BUF_WIDTH_BLK4 + ME_REFINE_BUF_STRIDE_BLK4
} //[3][]
};
void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) {
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
@ -1436,7 +1583,7 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM
int32_t iBestSadCost = 0, iBestSatdCost = 0;
SMeRefinePointer sMeRefine;
int32_t i, iIdx, iPixStride;
int32_t i, j, iIdx, iPixStride;
uint8_t* pRefCb = pMbCache->SPicData.pRefMb[1];
uint8_t* pRefCr = pMbCache->SPicData.pRefMb[2];
@ -1536,40 +1683,141 @@ void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurM
}
break;
case MB_TYPE_8x8:
sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x8Aligned;
pMbCache->sMvComponents.iRefIndexCache [9] = pMbCache->sMvComponents.iRefIndexCache [21] = REF_NOT_AVAIL;
for (i = 0; i < 4; i++) {
int32_t iBlk8Idx = i << 2; //0, 4, 8, 12
int32_t iBlk4X, iBlk4Y;
int32_t iBlk4X, iBlk4Y, iBlk4x4Idx;
pCurMb->pRefIndex[i] = pWelsMd->uiRef;
switch (pCurMb->uiSubMbType[i]) {
case SUB_MB_TYPE_8x8:
sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x8Aligned;
//luma
InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx8x8[i]);
PredMv (&pMbCache->sMvComponents, iBlk8Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMvp);
MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk8Idx], &pWelsMd->sMe.sMe8x8[i], &sMeRefine, 8, 8);
UpdateP8x8MotionInfo (pMbCache, pCurMb, iBlk8Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMv);
pMbCache->sMbMvp[g_kuiMbCountScan4Idx[iBlk8Idx]] = pWelsMd->sMe.sMe8x8[i].sMvp;
iBestSadCost += pWelsMd->sMe.sMe8x8[i].uiSadCost;
iBestSatdCost += pWelsMd->sMe.sMe8x8[i].uiSatdCost;
//luma
InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx8x8[i]);
PredMv (&pMbCache->sMvComponents, iBlk8Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMvp);
MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk8Idx], &pWelsMd->sMe.sMe8x8[i], &sMeRefine, 8, 8);
UpdateP8x8MotionInfo (pMbCache, pCurMb, iBlk8Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x8[i].sMv);
pMbCache->sMbMvp[i] = pWelsMd->sMe.sMe8x8[i].sMvp;
iBestSadCost += pWelsMd->sMe.sMe8x8[i].uiSadCost;
iBestSatdCost += pWelsMd->sMe.sMe8x8[i].uiSatdCost;
//chroma
pMv = &pWelsMd->sMe.sMe8x8[i].sMv;
iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
//chroma
pMv = &pWelsMd->sMe.sMe8x8[i].sMv;
iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
iBlk4X = (i & 1) << 2;
iBlk4Y = (i >> 1) << 2;
iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
iBlk4X = (i & 1) << 2;
iBlk4Y = (i >> 1) << 2;
iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
pTmpRefCb = pRefCb + iRefBlk4Stride;
pTmpDstCb = pDstCb + iDstBlk4Stride;
pTmpRefCr = pRefCr + iRefBlk4Stride;
pTmpDstCr = pDstCr + iDstBlk4Stride;
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
4, 4); //Cb
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
4, 4); //Cr
break;
case SUB_MB_TYPE_4x4:
sMeRefine.pfCopyBlockByMode = pFunc->pfCopy4x4;
//luma
for (j = 0; j < 4; ++j) {
iBlk4x4Idx = iBlk8Idx + j;
InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j]);
PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 1, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x4[i][j].sMvp);
MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe4x4[i][j], &sMeRefine, 4, 4);
UpdateP4x4MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x4[i][j].sMv);
pMbCache->sMbMvp[g_kuiMbCountScan4Idx[iBlk4x4Idx]] = pWelsMd->sMe.sMe4x4[i][j].sMvp;
iBestSadCost += pWelsMd->sMe.sMe4x4[i][j].uiSadCost;
iBestSatdCost += pWelsMd->sMe.sMe4x4[i][j].uiSatdCost;
pTmpRefCb = pRefCb + iRefBlk4Stride;
pTmpDstCb = pDstCb + iDstBlk4Stride;
pTmpRefCr = pRefCr + iRefBlk4Stride;
pTmpDstCr = pDstCr + iDstBlk4Stride;
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
4, 4); //Cb
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
4, 4); //Cr
//chroma
pMv = &pWelsMd->sMe.sMe4x4[i][j].sMv;
iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
iBlk4X = (((i & 1) << 1) + (j & 1)) << 1;
iBlk4Y = (((i >> 1) << 1) + (j >> 1)) << 1;
iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
pTmpRefCb = pRefCb + iRefBlk4Stride;
pTmpDstCb = pDstCb + iDstBlk4Stride;
pTmpRefCr = pRefCr + iRefBlk4Stride;
pTmpDstCr = pDstCr + iDstBlk4Stride;
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
2, 2); //Cb
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
2, 2); //Cr
}
break;
case SUB_MB_TYPE_8x4:
sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x4;
//luma
for (j = 0; j < 2; ++j) {
iBlk4x4Idx = iBlk8Idx + (j << 1);
InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j << 1]);
PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 2, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x4[i][j].sMvp);
MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe8x4[i][j], &sMeRefine, 8, 4);
UpdateP8x4MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe8x4[i][j].sMv);
pMbCache->sMbMvp[g_kuiMbCountScan4Idx[ iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp;
//pMbCache->sMbMvp[g_kuiMbCountScan4Idx[1 + iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp;
iBestSadCost += pWelsMd->sMe.sMe8x4[i][j].uiSadCost;
iBestSatdCost += pWelsMd->sMe.sMe8x4[i][j].uiSatdCost;
//chroma
pMv = &pWelsMd->sMe.sMe8x4[i][j].sMv;
iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
iBlk4X = ((i & 1) << 1) << 1;
iBlk4Y = (((i >> 1) << 1) + j) << 1;
iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
pTmpRefCb = pRefCb + iRefBlk4Stride;
pTmpDstCb = pDstCb + iDstBlk4Stride;
pTmpRefCr = pRefCr + iRefBlk4Stride;
pTmpDstCr = pDstCr + iDstBlk4Stride;
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
4, 2); //Cb
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
4, 2); //Cr
}
break;
case SUB_MB_TYPE_4x8:
sMeRefine.pfCopyBlockByMode = pFunc->pfCopy4x8;
//luma
for (j = 0; j < 2; ++j) {
iBlk4x4Idx = iBlk8Idx + j;
InitMeRefinePointer (&sMeRefine, pMbCache, g_kiPixStrideIdx4x4[i][j]);
PredMv (&pMbCache->sMvComponents, iBlk4x4Idx, 1, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x8[i][j].sMvp);
MeRefineFracPixel (pEncCtx, pDstLuma + g_kuiSmb4AddrIn256[iBlk4x4Idx], &pWelsMd->sMe.sMe4x8[i][j], &sMeRefine, 4, 8);
UpdateP4x8MotionInfo (pMbCache, pCurMb, iBlk4x4Idx, pWelsMd->uiRef, &pWelsMd->sMe.sMe4x8[i][j].sMv);
pMbCache->sMbMvp[g_kuiMbCountScan4Idx[ iBlk4x4Idx]] = pWelsMd->sMe.sMe4x8[i][j].sMvp;
//pMbCache->sMbMvp[g_kuiMbCountScan4Idx[4 + iBlk4x4Idx]] = pWelsMd->sMe.sMe8x4[i][j].sMvp;
iBestSadCost += pWelsMd->sMe.sMe4x8[i][j].uiSadCost;
iBestSatdCost += pWelsMd->sMe.sMe4x8[i][j].uiSatdCost;
//chroma
pMv = &pWelsMd->sMe.sMe4x8[i][j].sMv;
iMvStride = (pMv->iMvY >> 3) * iLineSizeRefUV + (pMv->iMvX >> 3);
iBlk4X = (((i & 1) << 1) + j) << 1;
iBlk4Y = (((i >> 1) << 1)) << 1;
iRefBlk4Stride = iBlk4Y * iLineSizeRefUV + iBlk4X;
iDstBlk4Stride = (iBlk4Y << 3) + iBlk4X;
pTmpRefCb = pRefCb + iRefBlk4Stride;
pTmpDstCb = pDstCb + iDstBlk4Stride;
pTmpRefCr = pRefCr + iRefBlk4Stride;
pTmpDstCr = pDstCr + iDstBlk4Stride;
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
2, 4); //Cb
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
2, 4); //Cr
}
break;
}
}
break;
default:

View File

@ -532,7 +532,8 @@ bool WelsMdInterJudgeSCDPskip (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* s
return false;
}
bool WelsMdInterJudgeSCDPskipFalse (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb, SMbCache* pMbCache) {
bool WelsMdInterJudgeSCDPskipFalse (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* slice, SMB* pCurMb,
SMbCache* pMbCache) {
return false;
}
@ -606,7 +607,8 @@ bool TryModeMerge (SMbCache* pMbCache, SWelsMD* pWelsMd, SMB* pCurMb) {
}
void WelsMdInterFinePartitionVaaOnScreen (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) {
void WelsMdInterFinePartitionVaaOnScreen (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb,
int32_t iBestCost) {
SMbCache* pMbCache = &pSlice->sMbCacheInfo;
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
int32_t iCostP8x8;
@ -620,8 +622,37 @@ void WelsMdInterFinePartitionVaaOnScreen (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd
if (iCostP8x8 < iBestCost) {
iBestCost = iCostP8x8;
pCurMb->uiMbType = MB_TYPE_8x8;
TryModeMerge (pMbCache, pWelsMd, pCurMb);
pCurMb->uiSubMbType[0] = pCurMb->uiSubMbType[1] = pCurMb->uiSubMbType[2] = pCurMb->uiSubMbType[3] = SUB_MB_TYPE_8x8;
#if 0 //Disable for sub8x8 modes for now
iBestCost = 0;
//reset neighbor info for sub8x8
pMbCache->sMvComponents.iRefIndexCache [9] = pMbCache->sMvComponents.iRefIndexCache [21] = REF_NOT_AVAIL;
for (int32_t i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) {
int32_t iCurCostSub8x8, iBestCostSub8x8 = pWelsMd->sMe.sMe8x8[i8x8Idx].uiSatdCost;
//4x4
iCurCostSub8x8 = WelsMdP4x4 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx);
if (iCurCostSub8x8 < iBestCostSub8x8) {
pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_4x4;
iBestCostSub8x8 = iCurCostSub8x8;
}
//8x4
iCurCostSub8x8 = WelsMdP8x4 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx);
if (iCurCostSub8x8 < iBestCostSub8x8) {
pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_8x4;
iBestCostSub8x8 = iCurCostSub8x8;
}
//4x8
iCurCostSub8x8 = WelsMdP4x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice, i8x8Idx);
if (iCurCostSub8x8 < iBestCostSub8x8) {
pCurMb->uiSubMbType[i8x8Idx] = SUB_MB_TYPE_4x8;
iBestCostSub8x8 = iCurCostSub8x8;
}
iBestCost += iBestCostSub8x8;
}
if ((pCurMb->uiSubMbType[0] == SUB_MB_TYPE_8x8) && (pCurMb->uiSubMbType[1] == SUB_MB_TYPE_8x8)
&& (pCurMb->uiSubMbType[2] == SUB_MB_TYPE_8x8) && (pCurMb->uiSubMbType[3] == SUB_MB_TYPE_8x8)) //all 8x8
#endif
TryModeMerge (pMbCache, pWelsMd, pCurMb);
}
pWelsMd->iCostLuma = iBestCost;
}

View File

@ -271,8 +271,7 @@ void WelsMbSkipCabac (SCabacCtx* pCabacCtx, SMB* pCurMb, int32_t iMbWidth, EWels
WelsCabacEncodeDecision (pCabacCtx, iCtx, bSkipFlag);
if (bSkipFlag) {
for (int i = 0; i < 4; i++) {
for (int i = 0; i < 16; i++) {
pCurMb->sMvd[i].iMvX = 0;
pCurMb->sMvd[i].iMvY = 0;
}
@ -338,7 +337,7 @@ inline void WelsCabacMbMvdLx (SCabacCtx* pCabacCtx, int32_t sMvd, int32_t iCtx,
}
}
SMVUnitXY WelsCabacMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, uint32_t iMbWidth,
SMVUnitXY sCurMv, SMVUnitXY sPredMv, int16_t iBlockIdx) {
SMVUnitXY sCurMv, SMVUnitXY sPredMv, int16_t i4x4ScanIdx) {
uint32_t iAbsMvd0, iAbsMvd1;
uint8_t uiNeighborAvail = pCurMb->uiNeighborAvail;
SMVUnitXY sMvd;
@ -347,18 +346,15 @@ SMVUnitXY WelsCabacMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, uint32_t iMbWidth,
sMvdLeft.iMvX = sMvdLeft.iMvY = sMvdTop.iMvX = sMvdTop.iMvY = 0;
sMvd.sDeltaMv (sCurMv, sPredMv);
if (((iBlockIdx == 0) || (iBlockIdx == 1)) && (uiNeighborAvail & TOP_MB_POS)) {
sMvdTop.sAssginMv ((pCurMb - iMbWidth)->sMvd[iBlockIdx + 2]);
if ((i4x4ScanIdx < 4) && (uiNeighborAvail & TOP_MB_POS)) { //top row blocks
sMvdTop.sAssginMv ((pCurMb - iMbWidth)->sMvd[i4x4ScanIdx + 12]);
} else if (i4x4ScanIdx >= 4) {
sMvdTop.sAssginMv (pCurMb->sMvd[i4x4ScanIdx - 4]);
}
if ((iBlockIdx == 2) || (iBlockIdx == 3)) {
sMvdTop.sAssginMv (pCurMb->sMvd[iBlockIdx - 2]);
}
if (((iBlockIdx == 0) || (iBlockIdx == 2)) && (uiNeighborAvail & LEFT_MB_POS)) {
sMvdLeft.sAssginMv ((pCurMb - 1)->sMvd[iBlockIdx + 1]);
}
if ((iBlockIdx == 1) || (iBlockIdx == 3)) {
sMvdLeft.sAssginMv (pCurMb->sMvd[iBlockIdx - 1]);
if ((! (i4x4ScanIdx & 0x03)) && (uiNeighborAvail & LEFT_MB_POS)) { //left column blocks
sMvdLeft.sAssginMv ((pCurMb - 1)->sMvd[i4x4ScanIdx + 3]);
} else if (i4x4ScanIdx & 0x03) {
sMvdLeft.sAssginMv (pCurMb->sMvd[i4x4ScanIdx - 1]);
}
iAbsMvd0 = WELS_ABS (sMvdLeft.iMvX) + WELS_ABS (sMvdTop.iMvX);
@ -368,6 +364,62 @@ SMVUnitXY WelsCabacMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, uint32_t iMbWidth,
WelsCabacMbMvdLx (pCabacCtx, sMvd.iMvY, 47, iAbsMvd1);
return sMvd;
}
static void WelsCabacSubMbType (SCabacCtx* pCabacCtx, SMB* pCurMb) {
for (int32_t i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) {
uint32_t uiSubMbType = pCurMb->uiSubMbType[i8x8Idx];
if (SUB_MB_TYPE_8x8 == uiSubMbType) {
WelsCabacEncodeDecision (pCabacCtx, 21, 1);
continue;
}
WelsCabacEncodeDecision (pCabacCtx, 21, 0);
if (SUB_MB_TYPE_8x4 == uiSubMbType) {
WelsCabacEncodeDecision (pCabacCtx, 22, 0);
} else {
WelsCabacEncodeDecision (pCabacCtx, 22, 1);
WelsCabacEncodeDecision (pCabacCtx, 23, SUB_MB_TYPE_4x8 == uiSubMbType);
}
} //for
}
static void WelsCabacSubMbMvd (SCabacCtx* pCabacCtx, SMB* pCurMb, SMbCache* pMbCache, const int kiMbWidth) {
SMVUnitXY sMvd;
int32_t i8x8Idx, i4x4ScanIdx;
for (i8x8Idx = 0; i8x8Idx < 4; ++i8x8Idx) {
uint32_t uiSubMbType = pCurMb->uiSubMbType[i8x8Idx];
if (SUB_MB_TYPE_8x8 == uiSubMbType) {
i4x4ScanIdx = g_kuiMbCountScan4Idx[i8x8Idx << 2];
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
i4x4ScanIdx);
pCurMb->sMvd[ i4x4ScanIdx].sAssginMv (sMvd);
pCurMb->sMvd[1 + i4x4ScanIdx].sAssginMv (sMvd);
pCurMb->sMvd[4 + i4x4ScanIdx].sAssginMv (sMvd);
pCurMb->sMvd[5 + i4x4ScanIdx].sAssginMv (sMvd);
} else if (SUB_MB_TYPE_4x4 == uiSubMbType) {
for (int32_t i4x4Idx = 0; i4x4Idx < 4; ++i4x4Idx) {
i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + i4x4Idx];
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
i4x4ScanIdx);
pCurMb->sMvd[i4x4ScanIdx].sAssginMv (sMvd);
}
} else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
for (int32_t i8x4Idx = 0; i8x4Idx < 2; ++i8x4Idx) {
i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + (i8x4Idx << 1)];
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
i4x4ScanIdx);
pCurMb->sMvd[ i4x4ScanIdx].sAssginMv (sMvd);
pCurMb->sMvd[1 + i4x4ScanIdx].sAssginMv (sMvd);
}
} else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
for (int32_t i4x8Idx = 0; i4x8Idx < 2; ++i4x8Idx) {
i4x4ScanIdx = g_kuiMbCountScan4Idx[ (i8x8Idx << 2) + i4x8Idx];
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, kiMbWidth, pCurMb->sMv[i4x4ScanIdx], pMbCache->sMbMvp[i4x4ScanIdx],
i4x4ScanIdx);
pCurMb->sMvd[ i4x4ScanIdx].sAssginMv (sMvd);
pCurMb->sMvd[4 + i4x4ScanIdx].sAssginMv (sMvd);
}
}
}
}
int16_t WelsGetMbCtxCabac (SMbCache* pMbCache, SMB* pCurMb, uint32_t iMbWidth, ECtxBlockCat eCtxBlockCat,
int16_t iIdx) {
@ -610,10 +662,9 @@ int32_t WelsSpatialWriteMbSynCabac (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* p
}
WelsCabacMbIntraChromaPredMode (pCabacCtx, pCurMb, pMbCache, iMbWidth);
sMvd.iMvX = sMvd.iMvY = 0;
pCurMb->sMvd[0].sAssginMv (sMvd);
pCurMb->sMvd[1].sAssginMv (sMvd);
pCurMb->sMvd[2].sAssginMv (sMvd);
pCurMb->sMvd[3].sAssginMv (sMvd);
for (i = 0; i < 16; ++i) {
pCurMb->sMvd[i].sAssginMv (sMvd);
}
} else if (uiMbType == MB_TYPE_16x16) {
@ -622,10 +673,9 @@ int32_t WelsSpatialWriteMbSynCabac (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* p
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
pCurMb->sMvd[0].sAssginMv (sMvd);
pCurMb->sMvd[1].sAssginMv (sMvd);
pCurMb->sMvd[2].sAssginMv (sMvd);
pCurMb->sMvd[3].sAssginMv (sMvd);
for (i = 0; i < 16; ++i) {
pCurMb->sMvd[i].sAssginMv (sMvd);
}
} else if (uiMbType == MB_TYPE_16x8) {
if (uiNumRefIdxL0Active > 0) {
@ -633,30 +683,31 @@ int32_t WelsSpatialWriteMbSynCabac (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* p
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 12);
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth , pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
pCurMb->sMvd[0].sAssginMv (sMvd);
pCurMb->sMvd[1].sAssginMv (sMvd);
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[8], pMbCache->sMbMvp[1], 2);
pCurMb->sMvd[2].sAssginMv (sMvd);
pCurMb->sMvd[3].sAssginMv (sMvd);
for (i = 0; i < 8; ++i) {
pCurMb->sMvd[i].sAssginMv (sMvd);
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[8], pMbCache->sMbMvp[1], 8);
for (i = 8; i < 16; ++i) {
pCurMb->sMvd[i].sAssginMv (sMvd);
}
} else if (uiMbType == MB_TYPE_8x16) {
if (uiNumRefIdxL0Active > 0) {
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0);
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 2);
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
pCurMb->sMvd[0].sAssginMv (sMvd);
pCurMb->sMvd[2].sAssginMv (sMvd);
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[2], pMbCache->sMbMvp[1], 1);
pCurMb->sMvd[1].sAssginMv (sMvd);
pCurMb->sMvd[3].sAssginMv (sMvd);
for (i = 0; i < 16; i += 4) {
pCurMb->sMvd[i ].sAssginMv (sMvd);
pCurMb->sMvd[i + 1].sAssginMv (sMvd);
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[2], pMbCache->sMbMvp[1], 2);
for (i = 0; i < 16; i += 4) {
pCurMb->sMvd[i + 2].sAssginMv (sMvd);
pCurMb->sMvd[i + 3].sAssginMv (sMvd);
}
} else if ((uiMbType == MB_TYPE_8x8) || (uiMbType == MB_TYPE_8x8_REF0)) {
for (i = 0; i < 4; i++)
WelsCabacEncodeDecision (pCabacCtx, 21, 1);
//write sub_mb_type
WelsCabacSubMbType (pCabacCtx, pCurMb);
if (uiNumRefIdxL0Active > 0) {
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 0);
@ -664,19 +715,8 @@ int32_t WelsSpatialWriteMbSynCabac (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* p
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 12);
WelsCabacMbRef (pCabacCtx, pCurMb, pMbCache, 14);
}
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[0], pMbCache->sMbMvp[0], 0);
pCurMb->sMvd[0].sAssginMv (sMvd);
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[2], pMbCache->sMbMvp[1], 1);
pCurMb->sMvd[1].sAssginMv (sMvd);
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[8], pMbCache->sMbMvp[2], 2);
pCurMb->sMvd[2].sAssginMv (sMvd);
sMvd = WelsCabacMbMvd (pCabacCtx, pCurMb, iMbWidth, pCurMb->sMv[10], pMbCache->sMbMvp[3], 3);
pCurMb->sMvd[3].sAssginMv (sMvd);
//write sub8x8 mvd
WelsCabacSubMbMvd (pCabacCtx, pCurMb, pMbCache, iMbWidth);
}
if (uiMbType != MB_TYPE_INTRA16x16) {
WelsCabacMbCbp (pCurMb, iMbWidth, pCabacCtx);

View File

@ -190,7 +190,22 @@ void WelsSpatialWriteSubMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurM
//step 1: sub_mb_type
for (i = 0; i < 4; i++) {
BsWriteUE (pBs, 0);
switch (pCurMb->uiSubMbType[i]) {
case SUB_MB_TYPE_8x8:
BsWriteUE (pBs, 0);
break;
case SUB_MB_TYPE_8x4:
BsWriteUE (pBs, 1);
break;
case SUB_MB_TYPE_4x8:
BsWriteUE (pBs, 2);
break;
case SUB_MB_TYPE_4x4:
BsWriteUE (pBs, 3);
break;
default: //should not enter
break;
}
}
//step 2: get and write uiRefIndex and sMvd
@ -202,8 +217,30 @@ void WelsSpatialWriteSubMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurM
}
//write sMvd
for (i = 0; i < 4; i++) {
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[i].iMvX);
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[i].iMvY);
uint32_t uiSubMbType = pCurMb->uiSubMbType[i];
if (SUB_MB_TYPE_8x8 == uiSubMbType) {
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
} else if (SUB_MB_TYPE_4x4 == uiSubMbType) {
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvX);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvY);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvX);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvY);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 3)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 3)].iMvX);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 3)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 3)].iMvY);
} else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvX);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 2)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 2)].iMvY);
} else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvX - pMbCache->sMbMvp[*kpScan4].iMvX);
BsWriteSE (pBs, pCurMb->sMv[*kpScan4].iMvY - pMbCache->sMbMvp[*kpScan4].iMvY);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvX - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvX);
BsWriteSE (pBs, pCurMb->sMv[* (kpScan4 + 1)].iMvY - pMbCache->sMbMvp[* (kpScan4 + 1)].iMvY);
}
kpScan4 += 4;
}
}