Merge pull request #1299 from ruil2/mvcost_check_1

fix crash on mvd cost calculation
This commit is contained in:
huili2 2014-08-25 16:38:16 +08:00
commit a4eea4c64d
4 changed files with 143 additions and 60 deletions

View File

@ -200,9 +200,9 @@ bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, void* vpMe,
// Cross Search Basics
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
#ifdef X86_ASM
extern "C"
@ -212,14 +212,14 @@ uint32_t SampleSad16x16Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_
}
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
#endif
void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,

View File

@ -155,9 +155,9 @@ typedef bool (*PCheckDirectionalMv) (PSampleSadSatdCostFunc pSad, void* vpMe,
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost);
typedef void (*PLineFullSearchFunc) (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
typedef void (*PInitializeHashforFeatureFunc) (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);

View File

@ -422,13 +422,23 @@ void CalcMvdCostx8_c (uint16_t* pMvdCost, const int32_t kiStartMv, uint16_t* pMv
}
}
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch) {
uint8_t* kpEncMb = pMe->pEncMb;
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
uint8_t* pRef = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiRefStride];
uint8_t* pRef = &pMe->pColoRefMb[kiMinMv * kiRefStride];
const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
int32_t iMinPos = kiCurMeBlockPixY + kiMinMv;
int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv;
int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvY]);
int16_t iStartMv = 0;
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
@ -438,7 +448,7 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
TransposeMatrixBlocksx8_mmx;
const int32_t kiDiff = kiMaxPos - kiMinPos;
const int32_t kiDiff = iMaxPos - iMinPos;
const int32_t kiRowNum = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
int32_t iCountLoop8 = (kiRowNum - kiEdgeBlocks) >> 3;
@ -451,7 +461,7 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
int32_t iTargetPos = kiMinPos;
int32_t iTargetPos = iMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
@ -460,7 +470,7 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
pRef = &uiMatrixRef[0][0];
while (iCountLoop8 > 0) {
CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
if (uiCostMin < uiBestCost) {
uiBestCost = uiCostMin;
@ -468,18 +478,20 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
}
iTargetPos += 8;
pRef += 8;
iStartMv += 8;
-- iCountLoop8;
}
if (kiRemainingVectors > 0) {
kpEncMb = pMe->pEncMb;
pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
while (iTargetPos < kiMaxPos) {
const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
while (iTargetPos < iMaxPos) {
const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
iStartMv++;
pRef += kiRefStride;
++iTargetPos;
}
@ -493,28 +505,34 @@ void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
}
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch) {
uint8_t* kpEncMb = pMe->pEncMb;
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
uint8_t* pRef = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
int32_t iMinPos = iCurMeBlockPixX + kiMinMv;
int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv;
int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvX]);
int16_t iStartMv = 0;
uint8_t* pRef = &pMe->pColoRefMb[kiMinMv];
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
const int32_t kiNumVector = kiMaxPos - kiMinPos;
const int32_t kiNumVector = iMaxPos - iMinPos;
int32_t iCountLoop8 = kiNumVector >> 3;
const int32_t kiRemainingLoop8 = kiNumVector & 7;
int32_t iTargetPos = kiMinPos;
int32_t iTargetPos = iMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
int32_t iIndexMinPos;
while (iCountLoop8 > 0) {
CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
if (uiCostMin < uiBestCost) {
uiBestCost = uiCostMin;
@ -522,56 +540,80 @@ void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
}
iTargetPos += 8;
pRef += 8;
iStartMv += 8;
-- iCountLoop8;
}
if (kiRemainingLoop8 > 0) {
while (iTargetPos < kiMaxPos) {
const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
while (iTargetPos < iMaxPos) {
const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
iStartMv++;
++pRef;
++iTargetPos;
}
}
if (uiBestCost < pMe->uiSadCost) {
SMVUnitXY sBestMv;
sBestMv.iMvX = iBestPos - kiCurMeBlockPix;
sBestMv.iMvX = iBestPos - iCurMeBlockPixX;
sBestMv.iMvY = 0;
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
}
}
#endif
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const int16_t iMinMv, const int16_t iMaxMv,
const bool bVerticalSearch) {
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
const int32_t kiCurMeBlockPix = bVerticalSearch ? pMe->iCurMeBlockPixY : pMe->iCurMeBlockPixX;
const int32_t kiStride = bVerticalSearch ? kiRefStride : 1;
uint8_t* pRef = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiStride];
uint16_t* pMvdCost = & (pMvdTable[kiMinPos << 2]);
const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX;
const int32_t kiCurMeBlockQpelPixX = ((kiCurMeBlockPixX) << 2);
const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
const int32_t kiCurMeBlockQpelPixY = ((kiCurMeBlockPixY) << 2);
int32_t iMinPos, iMaxPos;
int32_t iFixedMvd;
int32_t iCurMeBlockPix;
int32_t iStride;
uint16_t* pMvdCost;
if (bVerticalSearch) {
iMinPos = kiCurMeBlockPixY + iMinMv;
iMaxPos = kiCurMeBlockPixY + iMaxMv;
iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
iCurMeBlockPix = pMe->iCurMeBlockPixY;
iStride = kiRefStride;
pMvdCost = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvY]);
} else {
iMinPos = kiCurMeBlockPixX + iMinMv;
iMaxPos = kiCurMeBlockPixX + iMaxMv;
iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
iCurMeBlockPix = pMe->iCurMeBlockPixX;
iStride = 1;
pMvdCost = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvX]);
}
uint8_t* pRef = &pMe->pColoRefMb[ iMinMv * iStride];
uint32_t uiBestCost = 0xFFFFFFFF;
int32_t iBestPos = 0;
for (int32_t iTargetPos = kiMinPos; iTargetPos < kiMaxPos; ++ iTargetPos) {
for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) {
uint8_t* const kpEncMb = pMe->pEncMb;
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + *pMvdCost);
uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
pRef += kiStride;
pRef += iStride;
pMvdCost += 4;
}
if (uiBestCost < pMe->uiSadCost) {
SMVUnitXY sBestMv;
sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - kiCurMeBlockPix);
sBestMv.iMvY = bVerticalSearch ? (iBestPos - kiCurMeBlockPix) : 0;
sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix);
sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0;
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
}
}
@ -581,31 +623,25 @@ void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* p
PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
const int32_t iCurMeBlockQpelPixX = ((iCurMeBlockPixX) << 2);
const int32_t iCurMeBlockPixY = pMe->iCurMeBlockPixY;
const int32_t iCurMeBlockQpelPixY = ((iCurMeBlockPixY) << 2);
uint16_t* pMvdCostX = pMe->pMvdCost - iCurMeBlockQpelPixX - pMe->sMvp.iMvX;//do the offset here instead of in the search
uint16_t* pMvdCostY = pMe->pMvdCost - iCurMeBlockQpelPixY - pMe->sMvp.iMvY;//do the offset here instead of in the search
//vertical search
pfVerticalFullSearchFunc (pFuncList, pMe,
pMvdCostY, pMvdCostX[ iCurMeBlockQpelPixX ],
pMe->pMvdCost,
kiEncStride, kiRefStride,
iCurMeBlockPixY + pSlice->sMvStartMin.iMvY,
iCurMeBlockPixY + pSlice->sMvStartMax.iMvY, true);
pSlice->sMvStartMin.iMvY,
pSlice->sMvStartMax.iMvY, true);
//horizontal search
if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
pfHorizontalFullSearchFunc (pFuncList, pMe,
pMvdCostX, pMvdCostY[ iCurMeBlockQpelPixY ],
pMe->pMvdCost,
kiEncStride, kiRefStride,
iCurMeBlockPixX + pSlice->sMvStartMin.iMvX,
iCurMeBlockPixX + pSlice->sMvStartMax.iMvX,
pSlice->sMvStartMin.iMvX,
pSlice->sMvStartMax.iMvX,
false);
}
}
/////////////////////////
// Feature Search Basics
/////////////////////////

View File

@ -174,7 +174,6 @@ class MotionEstimateRangeTest : public ::testing::Test {
delete m_pMa;
m_pMa = NULL;
}
}
public:
uint8_t* m_pRefStart;
@ -206,7 +205,7 @@ TEST_F (MotionEstimateRangeTest, TestDiamondSearch) {
SWelsME sMe;
SSlice sSlice;
const uint8_t kuiQp = rand() % 52;
InitMe (kuiQp, m_uiMvdInterTableStride, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
@ -244,6 +243,50 @@ TEST_F (MotionEstimateRangeTest, TestDiamondSearch) {
}
TEST_F (MotionEstimateRangeTest, TestWelsMotionCrossSearch) {
SWelsFuncPtrList sFuncList;
SWelsME sMe;
SSlice sSlice;
int32_t iUsageType = 1;
uint8_t* pRef = m_pRefStart + PADDING_LENGTH * m_iWidthExt + PADDING_LENGTH;
const int32_t kiMaxBlock16Sad = 72000;//a rough number
WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
WelsInitMeFunc (&sFuncList, 0, iUsageType);
RandomPixelDataGenerator (m_pSrc, m_iWidth, m_iHeight, m_iWidth);
RandomPixelDataGenerator (m_pRefStart, m_iWidthExt, m_iHeightExt, m_iWidthExt);
sMe.uiBlockSize = BLOCK_16x16; //
for (int32_t iMby = 0; iMby < m_iMbHeight; iMby++) {
for (int32_t iMbx = 0; iMbx < m_iMbWidth; iMbx++) {
const uint8_t kuiQp = rand() % 52;
InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
SetMvWithinIntegerMvRange (m_iMbWidth, m_iMbHeight, iMbx , iMby, m_iMvRange,
& (sSlice.sMvStartMin), & (sSlice.sMvStartMax));
sMe.sMvp.iMvX = rand() % m_iMvRange;
sMe.sMvp.iMvY = rand() % m_iMvRange;
sMe.iCurMeBlockPixX = (iMbx << 4);
sMe.iCurMeBlockPixY = (iMby << 4);
sMe.pRefMb = pRef + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidthExt;
sMe.pEncMb = m_pSrc + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidth;;
sMe.uiSadCost = sMe.uiSatdCost = kiMaxBlock16Sad;
sMe.pColoRefMb = sMe.pRefMb;
WelsMotionCrossSearch (&sFuncList, &sMe, &sSlice, m_iWidth, m_iWidthExt);
if ((WELS_ABS (sMe.sMv.iMvX) > m_iMvRange))
printf ("mvx = %d\n", sMe.sMv.iMvX);
ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvX) > m_iMvRange));
if ((WELS_ABS (sMe.sMv.iMvY) > m_iMvRange))
printf ("mvy = %d\n", sMe.sMv.iMvY);
ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvY) > m_iMvRange));
}
}
}
void MotionEstimateTest::DoLineTest (PLineFullSearchFunc func, bool vertical) {
const int32_t kiMaxBlock16Sad = 72000;//a rough number
SWelsFuncPtrList sFuncList;
@ -295,18 +338,22 @@ void MotionEstimateTest::DoLineTest (PLineFullSearchFunc func, bool vertical) {
uint16_t* pMvdCostY = sMe.pMvdCost - iCurMeBlockQpelPixY - sMe.sMvp.iMvY;
uint16_t* pMvdCost = vertical ? pMvdCostY : pMvdCostX;
int iSize = vertical ? m_iHeight : m_iWidth;
int iFixedMvd = vertical ? pMvdCostX[ iCurMeBlockQpelPixX ] : pMvdCostY[ iCurMeBlockQpelPixY ];
func (&sFuncList, &sMe,
pMvdCost, iFixedMvd,
m_iMaxSearchBlock, m_iWidth,
INTPEL_NEEDED_MARGIN,
iSize - INTPEL_NEEDED_MARGIN - 16, vertical);
//the last selection may be affected by MVDcost, that is when smaller MvY will be better
if (vertical) {
func (&sFuncList, &sMe,
pMvdCost,
m_iMaxSearchBlock, m_iWidth,
INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixY,
iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixY, vertical);
bFoundMatch = (sMe.sMv.iMvX == 0
&& (sMe.sMv.iMvY == sTargetMv.iMvY || abs (sMe.sMv.iMvY) < abs (sTargetMv.iMvY)));
} else {
func (&sFuncList, &sMe,
pMvdCost,
m_iMaxSearchBlock, m_iWidth,
INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixX,
iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixX, vertical);
bFoundMatch = (sMe.sMv.iMvY == 0
&& (sMe.sMv.iMvX == sTargetMv.iMvX || abs (sMe.sMv.iMvX) < abs (sTargetMv.iMvX)));
}