Fix shifting tricks in deblocking for big endian

The code interprets an array of 4 uint8_t values as one uint32_t
and does shifts on the value. The same optimization can be
kept in big endian as well, but the shift has to be done in the
other direction.

This code could be made truly independent of endianness, but
that could cause some minimal performance degradaion, at least
in theory.

This makes "make test" pass on big endian, assuming that
WORDS_BIGENDIAN is defined while building.
This commit is contained in:
Martin Storsjö
2014-01-29 12:49:38 +02:00
parent 47ec78843a
commit cadbec75d8
3 changed files with 22 additions and 16 deletions

View File

@@ -164,24 +164,24 @@ void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4],
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
* (uint32_t*)uiBsx3 = (uiNnz32b0 | (uiNnz32b0 >> 8)) << iLShiftFactor;
* (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor;
uiBS[0][1][0] = uiBsx3[0];
uiBS[0][2][0] = uiBsx3[1];
uiBS[0][3][0] = uiBsx3[2];
* (uint32_t*)uiBsx3 = (uiNnz32b1 | (uiNnz32b1 >> 8)) << iLShiftFactor;
* (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor;
uiBS[0][1][1] = uiBsx3[0];
uiBS[0][2][1] = uiBsx3[1];
uiBS[0][3][1] = uiBsx3[2];
* (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
* (uint32_t*)uiBsx3 = (uiNnz32b2 | (uiNnz32b2 >> 8)) << iLShiftFactor;
* (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor;
uiBS[0][1][2] = uiBsx3[0];
uiBS[0][2][2] = uiBsx3[1];
uiBS[0][3][2] = uiBsx3[2];
* (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
* (uint32_t*)uiBsx3 = (uiNnz32b3 | (uiNnz32b3 >> 8)) << iLShiftFactor;
* (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor;
uiBS[0][1][3] = uiBsx3[0];
uiBS[0][2][3] = uiBsx3[1];
uiBS[0][3][3] = uiBsx3[2];
@@ -198,22 +198,22 @@ void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
* (uint32_t*)uiBsx4 = (uiNnz32b0 | (uiNnz32b0 >> 8));
* (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b0);
uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0);
uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1);
uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2);
* (uint32_t*)uiBsx4 = (uiNnz32b1 | (uiNnz32b1 >> 8));
* (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b1);
uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4);
uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5);
uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6);
* (uint32_t*)uiBsx4 = (uiNnz32b2 | (uiNnz32b2 >> 8));
* (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b2);
uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8);
uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9);
uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10);
* (uint32_t*)uiBsx4 = (uiNnz32b3 | (uiNnz32b3 >> 8));
* (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b3);
uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12);
uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13);
uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14);