Merge pull request #1299 from ruil2/mvcost_check_1
fix crash on mvd cost calculation
This commit is contained in:
commit
a4eea4c64d
@ -200,9 +200,9 @@ bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, void* vpMe,
|
|||||||
|
|
||||||
// Cross Search Basics
|
// Cross Search Basics
|
||||||
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
||||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
uint16_t* pMvdTable,
|
||||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
const int16_t kiMinMv, const int16_t kiMaxMv,
|
||||||
const bool bVerticalSearch);
|
const bool bVerticalSearch);
|
||||||
#ifdef X86_ASM
|
#ifdef X86_ASM
|
||||||
extern "C"
|
extern "C"
|
||||||
@ -212,14 +212,14 @@ uint32_t SampleSad16x16Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_
|
|||||||
}
|
}
|
||||||
|
|
||||||
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
||||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
uint16_t* pMvdTable,
|
||||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
const int16_t kiMinMv, const int16_t kiMaxMv,
|
||||||
const bool bVerticalSearch);
|
const bool bVerticalSearch);
|
||||||
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
||||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
uint16_t* pMvdTable,
|
||||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
const int16_t kiMinMv, const int16_t kiMaxMv,
|
||||||
const bool bVerticalSearch);
|
const bool bVerticalSearch);
|
||||||
#endif
|
#endif
|
||||||
void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
|
void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
|
||||||
|
@ -155,9 +155,9 @@ typedef bool (*PCheckDirectionalMv) (PSampleSadSatdCostFunc pSad, void* vpMe,
|
|||||||
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
|
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
int32_t& iBestSadCost);
|
int32_t& iBestSadCost);
|
||||||
typedef void (*PLineFullSearchFunc) (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
typedef void (*PLineFullSearchFunc) (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
||||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
uint16_t* pMvdTable,
|
||||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
const int16_t kiMinMv, const int16_t kiMaxMv,
|
||||||
const bool bVerticalSearch);
|
const bool bVerticalSearch);
|
||||||
typedef void (*PInitializeHashforFeatureFunc) (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
|
typedef void (*PInitializeHashforFeatureFunc) (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
|
||||||
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
|
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
|
||||||
|
@ -422,13 +422,23 @@ void CalcMvdCostx8_c (uint16_t* pMvdCost, const int32_t kiStartMv, uint16_t* pMv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
||||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
uint16_t* pMvdTable,
|
||||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
const int16_t kiMinMv, const int16_t kiMaxMv,
|
||||||
const bool bVerticalSearch) {
|
const bool bVerticalSearch) {
|
||||||
uint8_t* kpEncMb = pMe->pEncMb;
|
uint8_t* kpEncMb = pMe->pEncMb;
|
||||||
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
|
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
|
||||||
uint8_t* pRef = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiRefStride];
|
uint8_t* pRef = &pMe->pColoRefMb[kiMinMv * kiRefStride];
|
||||||
|
|
||||||
|
const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
|
||||||
|
|
||||||
|
int32_t iMinPos = kiCurMeBlockPixY + kiMinMv;
|
||||||
|
int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv;
|
||||||
|
int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
|
||||||
|
uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvY]);
|
||||||
|
int16_t iStartMv = 0;
|
||||||
|
|
||||||
|
|
||||||
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
|
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
|
||||||
const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
|
const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
|
||||||
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
|
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
|
||||||
@ -438,7 +448,7 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
|||||||
PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
|
PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
|
||||||
TransposeMatrixBlocksx8_mmx;
|
TransposeMatrixBlocksx8_mmx;
|
||||||
|
|
||||||
const int32_t kiDiff = kiMaxPos - kiMinPos;
|
const int32_t kiDiff = iMaxPos - iMinPos;
|
||||||
const int32_t kiRowNum = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
|
const int32_t kiRowNum = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
|
||||||
const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
|
const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
|
||||||
int32_t iCountLoop8 = (kiRowNum - kiEdgeBlocks) >> 3;
|
int32_t iCountLoop8 = (kiRowNum - kiEdgeBlocks) >> 3;
|
||||||
@ -451,7 +461,7 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
|||||||
TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
|
TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
|
||||||
TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
|
TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
|
||||||
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
|
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
|
||||||
int32_t iTargetPos = kiMinPos;
|
int32_t iTargetPos = iMinPos;
|
||||||
int16_t iBestPos = pMe->sMv.iMvX;
|
int16_t iBestPos = pMe->sMv.iMvX;
|
||||||
uint32_t uiBestCost = pMe->uiSadCost;
|
uint32_t uiBestCost = pMe->uiSadCost;
|
||||||
uint32_t uiCostMin;
|
uint32_t uiCostMin;
|
||||||
@ -460,7 +470,7 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
|||||||
pRef = &uiMatrixRef[0][0];
|
pRef = &uiMatrixRef[0][0];
|
||||||
|
|
||||||
while (iCountLoop8 > 0) {
|
while (iCountLoop8 > 0) {
|
||||||
CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
|
CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
|
||||||
uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
|
uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
|
||||||
if (uiCostMin < uiBestCost) {
|
if (uiCostMin < uiBestCost) {
|
||||||
uiBestCost = uiCostMin;
|
uiBestCost = uiCostMin;
|
||||||
@ -468,18 +478,20 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
|||||||
}
|
}
|
||||||
iTargetPos += 8;
|
iTargetPos += 8;
|
||||||
pRef += 8;
|
pRef += 8;
|
||||||
|
iStartMv += 8;
|
||||||
-- iCountLoop8;
|
-- iCountLoop8;
|
||||||
}
|
}
|
||||||
if (kiRemainingVectors > 0) {
|
if (kiRemainingVectors > 0) {
|
||||||
kpEncMb = pMe->pEncMb;
|
kpEncMb = pMe->pEncMb;
|
||||||
pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
|
pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
|
||||||
while (iTargetPos < kiMaxPos) {
|
while (iTargetPos < iMaxPos) {
|
||||||
const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
|
const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
|
||||||
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
|
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
|
||||||
if (uiSadCost < uiBestCost) {
|
if (uiSadCost < uiBestCost) {
|
||||||
uiBestCost = uiSadCost;
|
uiBestCost = uiSadCost;
|
||||||
iBestPos = iTargetPos;
|
iBestPos = iTargetPos;
|
||||||
}
|
}
|
||||||
|
iStartMv++;
|
||||||
pRef += kiRefStride;
|
pRef += kiRefStride;
|
||||||
++iTargetPos;
|
++iTargetPos;
|
||||||
}
|
}
|
||||||
@ -493,28 +505,34 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
||||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
uint16_t* pMvdTable,
|
||||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
const int16_t kiMinMv, const int16_t kiMaxMv,
|
||||||
const bool bVerticalSearch) {
|
const bool bVerticalSearch) {
|
||||||
uint8_t* kpEncMb = pMe->pEncMb;
|
uint8_t* kpEncMb = pMe->pEncMb;
|
||||||
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
|
|
||||||
uint8_t* pRef = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
|
const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
|
||||||
|
int32_t iMinPos = iCurMeBlockPixX + kiMinMv;
|
||||||
|
int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv;
|
||||||
|
int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
|
||||||
|
uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvX]);
|
||||||
|
int16_t iStartMv = 0;
|
||||||
|
uint8_t* pRef = &pMe->pColoRefMb[kiMinMv];
|
||||||
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
|
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
|
||||||
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
|
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
|
||||||
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
|
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
|
||||||
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
|
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
|
||||||
const int32_t kiNumVector = kiMaxPos - kiMinPos;
|
const int32_t kiNumVector = iMaxPos - iMinPos;
|
||||||
int32_t iCountLoop8 = kiNumVector >> 3;
|
int32_t iCountLoop8 = kiNumVector >> 3;
|
||||||
const int32_t kiRemainingLoop8 = kiNumVector & 7;
|
const int32_t kiRemainingLoop8 = kiNumVector & 7;
|
||||||
int32_t iTargetPos = kiMinPos;
|
int32_t iTargetPos = iMinPos;
|
||||||
int16_t iBestPos = pMe->sMv.iMvX;
|
int16_t iBestPos = pMe->sMv.iMvX;
|
||||||
uint32_t uiBestCost = pMe->uiSadCost;
|
uint32_t uiBestCost = pMe->uiSadCost;
|
||||||
uint32_t uiCostMin;
|
uint32_t uiCostMin;
|
||||||
int32_t iIndexMinPos;
|
int32_t iIndexMinPos;
|
||||||
|
|
||||||
while (iCountLoop8 > 0) {
|
while (iCountLoop8 > 0) {
|
||||||
CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
|
CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
|
||||||
uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
|
uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
|
||||||
if (uiCostMin < uiBestCost) {
|
if (uiCostMin < uiBestCost) {
|
||||||
uiBestCost = uiCostMin;
|
uiBestCost = uiCostMin;
|
||||||
@ -522,56 +540,80 @@ void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
|||||||
}
|
}
|
||||||
iTargetPos += 8;
|
iTargetPos += 8;
|
||||||
pRef += 8;
|
pRef += 8;
|
||||||
|
iStartMv += 8;
|
||||||
-- iCountLoop8;
|
-- iCountLoop8;
|
||||||
}
|
}
|
||||||
if (kiRemainingLoop8 > 0) {
|
if (kiRemainingLoop8 > 0) {
|
||||||
while (iTargetPos < kiMaxPos) {
|
while (iTargetPos < iMaxPos) {
|
||||||
const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
|
const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
|
||||||
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
|
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
|
||||||
if (uiSadCost < uiBestCost) {
|
if (uiSadCost < uiBestCost) {
|
||||||
uiBestCost = uiSadCost;
|
uiBestCost = uiSadCost;
|
||||||
iBestPos = iTargetPos;
|
iBestPos = iTargetPos;
|
||||||
}
|
}
|
||||||
|
iStartMv++;
|
||||||
++pRef;
|
++pRef;
|
||||||
++iTargetPos;
|
++iTargetPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (uiBestCost < pMe->uiSadCost) {
|
if (uiBestCost < pMe->uiSadCost) {
|
||||||
SMVUnitXY sBestMv;
|
SMVUnitXY sBestMv;
|
||||||
sBestMv.iMvX = iBestPos - kiCurMeBlockPix;
|
sBestMv.iMvX = iBestPos - iCurMeBlockPixX;
|
||||||
sBestMv.iMvY = 0;
|
sBestMv.iMvY = 0;
|
||||||
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
|
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
|
||||||
uint16_t* pMvdTable, const int32_t kiFixedMvd,
|
uint16_t* pMvdTable,
|
||||||
const int32_t kiEncStride, const int32_t kiRefStride,
|
const int32_t kiEncStride, const int32_t kiRefStride,
|
||||||
const int32_t kiMinPos, const int32_t kiMaxPos,
|
const int16_t iMinMv, const int16_t iMaxMv,
|
||||||
const bool bVerticalSearch) {
|
const bool bVerticalSearch) {
|
||||||
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
|
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
|
||||||
const int32_t kiCurMeBlockPix = bVerticalSearch ? pMe->iCurMeBlockPixY : pMe->iCurMeBlockPixX;
|
const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX;
|
||||||
const int32_t kiStride = bVerticalSearch ? kiRefStride : 1;
|
const int32_t kiCurMeBlockQpelPixX = ((kiCurMeBlockPixX) << 2);
|
||||||
uint8_t* pRef = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiStride];
|
const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
|
||||||
uint16_t* pMvdCost = & (pMvdTable[kiMinPos << 2]);
|
const int32_t kiCurMeBlockQpelPixY = ((kiCurMeBlockPixY) << 2);
|
||||||
|
int32_t iMinPos, iMaxPos;
|
||||||
|
int32_t iFixedMvd;
|
||||||
|
int32_t iCurMeBlockPix;
|
||||||
|
int32_t iStride;
|
||||||
|
uint16_t* pMvdCost;
|
||||||
|
|
||||||
|
if (bVerticalSearch) {
|
||||||
|
iMinPos = kiCurMeBlockPixY + iMinMv;
|
||||||
|
iMaxPos = kiCurMeBlockPixY + iMaxMv;
|
||||||
|
iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
|
||||||
|
iCurMeBlockPix = pMe->iCurMeBlockPixY;
|
||||||
|
iStride = kiRefStride;
|
||||||
|
pMvdCost = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvY]);
|
||||||
|
} else {
|
||||||
|
iMinPos = kiCurMeBlockPixX + iMinMv;
|
||||||
|
iMaxPos = kiCurMeBlockPixX + iMaxMv;
|
||||||
|
iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
|
||||||
|
iCurMeBlockPix = pMe->iCurMeBlockPixX;
|
||||||
|
iStride = 1;
|
||||||
|
pMvdCost = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvX]);
|
||||||
|
}
|
||||||
|
uint8_t* pRef = &pMe->pColoRefMb[ iMinMv * iStride];
|
||||||
uint32_t uiBestCost = 0xFFFFFFFF;
|
uint32_t uiBestCost = 0xFFFFFFFF;
|
||||||
int32_t iBestPos = 0;
|
int32_t iBestPos = 0;
|
||||||
|
|
||||||
for (int32_t iTargetPos = kiMinPos; iTargetPos < kiMaxPos; ++ iTargetPos) {
|
for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) {
|
||||||
uint8_t* const kpEncMb = pMe->pEncMb;
|
uint8_t* const kpEncMb = pMe->pEncMb;
|
||||||
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + *pMvdCost);
|
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost);
|
||||||
if (uiSadCost < uiBestCost) {
|
if (uiSadCost < uiBestCost) {
|
||||||
uiBestCost = uiSadCost;
|
uiBestCost = uiSadCost;
|
||||||
iBestPos = iTargetPos;
|
iBestPos = iTargetPos;
|
||||||
}
|
}
|
||||||
pRef += kiStride;
|
pRef += iStride;
|
||||||
pMvdCost += 4;
|
pMvdCost += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uiBestCost < pMe->uiSadCost) {
|
if (uiBestCost < pMe->uiSadCost) {
|
||||||
SMVUnitXY sBestMv;
|
SMVUnitXY sBestMv;
|
||||||
sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - kiCurMeBlockPix);
|
sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix);
|
||||||
sBestMv.iMvY = bVerticalSearch ? (iBestPos - kiCurMeBlockPix) : 0;
|
sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0;
|
||||||
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
|
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -581,31 +623,25 @@ void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* p
|
|||||||
PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
|
PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
|
||||||
PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
|
PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
|
||||||
|
|
||||||
const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
|
|
||||||
const int32_t iCurMeBlockQpelPixX = ((iCurMeBlockPixX) << 2);
|
|
||||||
const int32_t iCurMeBlockPixY = pMe->iCurMeBlockPixY;
|
|
||||||
const int32_t iCurMeBlockQpelPixY = ((iCurMeBlockPixY) << 2);
|
|
||||||
uint16_t* pMvdCostX = pMe->pMvdCost - iCurMeBlockQpelPixX - pMe->sMvp.iMvX;//do the offset here instead of in the search
|
|
||||||
uint16_t* pMvdCostY = pMe->pMvdCost - iCurMeBlockQpelPixY - pMe->sMvp.iMvY;//do the offset here instead of in the search
|
|
||||||
|
|
||||||
//vertical search
|
//vertical search
|
||||||
pfVerticalFullSearchFunc (pFuncList, pMe,
|
pfVerticalFullSearchFunc (pFuncList, pMe,
|
||||||
pMvdCostY, pMvdCostX[ iCurMeBlockQpelPixX ],
|
pMe->pMvdCost,
|
||||||
kiEncStride, kiRefStride,
|
kiEncStride, kiRefStride,
|
||||||
iCurMeBlockPixY + pSlice->sMvStartMin.iMvY,
|
pSlice->sMvStartMin.iMvY,
|
||||||
iCurMeBlockPixY + pSlice->sMvStartMax.iMvY, true);
|
pSlice->sMvStartMax.iMvY, true);
|
||||||
|
|
||||||
//horizontal search
|
//horizontal search
|
||||||
if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
|
if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
|
||||||
pfHorizontalFullSearchFunc (pFuncList, pMe,
|
pfHorizontalFullSearchFunc (pFuncList, pMe,
|
||||||
pMvdCostX, pMvdCostY[ iCurMeBlockQpelPixY ],
|
pMe->pMvdCost,
|
||||||
kiEncStride, kiRefStride,
|
kiEncStride, kiRefStride,
|
||||||
iCurMeBlockPixX + pSlice->sMvStartMin.iMvX,
|
pSlice->sMvStartMin.iMvX,
|
||||||
iCurMeBlockPixX + pSlice->sMvStartMax.iMvX,
|
pSlice->sMvStartMax.iMvX,
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
// Feature Search Basics
|
// Feature Search Basics
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
@ -174,7 +174,6 @@ class MotionEstimateRangeTest : public ::testing::Test {
|
|||||||
delete m_pMa;
|
delete m_pMa;
|
||||||
m_pMa = NULL;
|
m_pMa = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
uint8_t* m_pRefStart;
|
uint8_t* m_pRefStart;
|
||||||
@ -206,7 +205,7 @@ TEST_F (MotionEstimateRangeTest, TestDiamondSearch) {
|
|||||||
SWelsME sMe;
|
SWelsME sMe;
|
||||||
SSlice sSlice;
|
SSlice sSlice;
|
||||||
const uint8_t kuiQp = rand() % 52;
|
const uint8_t kuiQp = rand() % 52;
|
||||||
InitMe (kuiQp, m_uiMvdInterTableStride, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
|
InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
|
||||||
|
|
||||||
WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
|
WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
|
||||||
|
|
||||||
@ -244,6 +243,50 @@ TEST_F (MotionEstimateRangeTest, TestDiamondSearch) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F (MotionEstimateRangeTest, TestWelsMotionCrossSearch) {
|
||||||
|
|
||||||
|
SWelsFuncPtrList sFuncList;
|
||||||
|
SWelsME sMe;
|
||||||
|
SSlice sSlice;
|
||||||
|
int32_t iUsageType = 1;
|
||||||
|
uint8_t* pRef = m_pRefStart + PADDING_LENGTH * m_iWidthExt + PADDING_LENGTH;
|
||||||
|
const int32_t kiMaxBlock16Sad = 72000;//a rough number
|
||||||
|
|
||||||
|
WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
|
||||||
|
WelsInitMeFunc (&sFuncList, 0, iUsageType);
|
||||||
|
|
||||||
|
RandomPixelDataGenerator (m_pSrc, m_iWidth, m_iHeight, m_iWidth);
|
||||||
|
RandomPixelDataGenerator (m_pRefStart, m_iWidthExt, m_iHeightExt, m_iWidthExt);
|
||||||
|
|
||||||
|
sMe.uiBlockSize = BLOCK_16x16; //
|
||||||
|
for (int32_t iMby = 0; iMby < m_iMbHeight; iMby++) {
|
||||||
|
for (int32_t iMbx = 0; iMbx < m_iMbWidth; iMbx++) {
|
||||||
|
|
||||||
|
const uint8_t kuiQp = rand() % 52;
|
||||||
|
|
||||||
|
InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
|
||||||
|
SetMvWithinIntegerMvRange (m_iMbWidth, m_iMbHeight, iMbx , iMby, m_iMvRange,
|
||||||
|
& (sSlice.sMvStartMin), & (sSlice.sMvStartMax));
|
||||||
|
|
||||||
|
|
||||||
|
sMe.sMvp.iMvX = rand() % m_iMvRange;
|
||||||
|
sMe.sMvp.iMvY = rand() % m_iMvRange;
|
||||||
|
sMe.iCurMeBlockPixX = (iMbx << 4);
|
||||||
|
sMe.iCurMeBlockPixY = (iMby << 4);
|
||||||
|
sMe.pRefMb = pRef + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidthExt;
|
||||||
|
sMe.pEncMb = m_pSrc + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidth;;
|
||||||
|
sMe.uiSadCost = sMe.uiSatdCost = kiMaxBlock16Sad;
|
||||||
|
sMe.pColoRefMb = sMe.pRefMb;
|
||||||
|
WelsMotionCrossSearch (&sFuncList, &sMe, &sSlice, m_iWidth, m_iWidthExt);
|
||||||
|
if ((WELS_ABS (sMe.sMv.iMvX) > m_iMvRange))
|
||||||
|
printf ("mvx = %d\n", sMe.sMv.iMvX);
|
||||||
|
ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvX) > m_iMvRange));
|
||||||
|
if ((WELS_ABS (sMe.sMv.iMvY) > m_iMvRange))
|
||||||
|
printf ("mvy = %d\n", sMe.sMv.iMvY);
|
||||||
|
ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvY) > m_iMvRange));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
void MotionEstimateTest::DoLineTest (PLineFullSearchFunc func, bool vertical) {
|
void MotionEstimateTest::DoLineTest (PLineFullSearchFunc func, bool vertical) {
|
||||||
const int32_t kiMaxBlock16Sad = 72000;//a rough number
|
const int32_t kiMaxBlock16Sad = 72000;//a rough number
|
||||||
SWelsFuncPtrList sFuncList;
|
SWelsFuncPtrList sFuncList;
|
||||||
@ -295,18 +338,22 @@ void MotionEstimateTest::DoLineTest (PLineFullSearchFunc func, bool vertical) {
|
|||||||
uint16_t* pMvdCostY = sMe.pMvdCost - iCurMeBlockQpelPixY - sMe.sMvp.iMvY;
|
uint16_t* pMvdCostY = sMe.pMvdCost - iCurMeBlockQpelPixY - sMe.sMvp.iMvY;
|
||||||
uint16_t* pMvdCost = vertical ? pMvdCostY : pMvdCostX;
|
uint16_t* pMvdCost = vertical ? pMvdCostY : pMvdCostX;
|
||||||
int iSize = vertical ? m_iHeight : m_iWidth;
|
int iSize = vertical ? m_iHeight : m_iWidth;
|
||||||
int iFixedMvd = vertical ? pMvdCostX[ iCurMeBlockQpelPixX ] : pMvdCostY[ iCurMeBlockQpelPixY ];
|
|
||||||
func (&sFuncList, &sMe,
|
|
||||||
pMvdCost, iFixedMvd,
|
|
||||||
m_iMaxSearchBlock, m_iWidth,
|
|
||||||
INTPEL_NEEDED_MARGIN,
|
|
||||||
iSize - INTPEL_NEEDED_MARGIN - 16, vertical);
|
|
||||||
|
|
||||||
//the last selection may be affected by MVDcost, that is when smaller MvY will be better
|
//the last selection may be affected by MVDcost, that is when smaller MvY will be better
|
||||||
if (vertical) {
|
if (vertical) {
|
||||||
|
func (&sFuncList, &sMe,
|
||||||
|
pMvdCost,
|
||||||
|
m_iMaxSearchBlock, m_iWidth,
|
||||||
|
INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixY,
|
||||||
|
iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixY, vertical);
|
||||||
bFoundMatch = (sMe.sMv.iMvX == 0
|
bFoundMatch = (sMe.sMv.iMvX == 0
|
||||||
&& (sMe.sMv.iMvY == sTargetMv.iMvY || abs (sMe.sMv.iMvY) < abs (sTargetMv.iMvY)));
|
&& (sMe.sMv.iMvY == sTargetMv.iMvY || abs (sMe.sMv.iMvY) < abs (sTargetMv.iMvY)));
|
||||||
} else {
|
} else {
|
||||||
|
func (&sFuncList, &sMe,
|
||||||
|
pMvdCost,
|
||||||
|
m_iMaxSearchBlock, m_iWidth,
|
||||||
|
INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixX,
|
||||||
|
iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixX, vertical);
|
||||||
bFoundMatch = (sMe.sMv.iMvY == 0
|
bFoundMatch = (sMe.sMv.iMvY == 0
|
||||||
&& (sMe.sMv.iMvX == sTargetMv.iMvX || abs (sMe.sMv.iMvX) < abs (sTargetMv.iMvX)));
|
&& (sMe.sMv.iMvX == sTargetMv.iMvX || abs (sMe.sMv.iMvX) < abs (sTargetMv.iMvX)));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user