From 0a14d4c79b88098eebfaa44dbbd78ae98accf08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 29 Jan 2014 14:45:47 +0200 Subject: [PATCH] Make the deblocking handling endian independent This avoids having to set a define indicating the endianness. This doesn't incur any significant measurable slowdown. --- codec/common/deblocking_common.h | 6 ---- codec/decoder/core/src/deblocking.cpp | 41 +++++++++++++-------------- codec/encoder/core/src/deblocking.cpp | 41 +++++++++++++-------------- 3 files changed, 40 insertions(+), 48 deletions(-) diff --git a/codec/common/deblocking_common.h b/codec/common/deblocking_common.h index f91e070b..e49a5105 100644 --- a/codec/common/deblocking_common.h +++ b/codec/common/deblocking_common.h @@ -15,12 +15,6 @@ void_t DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, i int8_t* pTc); void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); -#ifdef WORDS_BIGENDIAN -#define DEBLOCK_BS_SHIFTED(x) ((x) | ((x) << 8)) -#else -#define DEBLOCK_BS_SHIFTED(x) ((x) | ((x) >> 8)) -#endif - #if defined(__cplusplus) extern "C" { #endif//__cplusplus diff --git a/codec/decoder/core/src/deblocking.cpp b/codec/decoder/core/src/deblocking.cpp index 6266c49e..2691f796 100644 --- a/codec/decoder/core/src/deblocking.cpp +++ b/codec/decoder/core/src/deblocking.cpp @@ -145,34 +145,29 @@ static const uint8_t g_kuiTableBIdx[2][8] = { void_t inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; - ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor; - nBS[0][1][0] = uiBsx3[0]; - nBS[0][2][0] = uiBsx3[1]; - nBS[0][3][0] = uiBsx3[2]; + nBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor; + nBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor; + nBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor; - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor; - nBS[0][1][1] = uiBsx3[0]; - nBS[0][2][1] = uiBsx3[1]; - nBS[0][3][1] = uiBsx3[2]; + nBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor; + nBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor; + nBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor; * (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor; - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor; - nBS[0][1][2] = uiBsx3[0]; - nBS[0][2][2] = uiBsx3[1]; - nBS[0][3][2] = uiBsx3[2]; + nBS[0][1][2] = (pNnzTab[8] | pNnzTab[9]) << iLShiftFactor; + nBS[0][2][2] = (pNnzTab[9] | pNnzTab[10]) << iLShiftFactor; + nBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor; * (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor; - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor; - nBS[0][1][3] = uiBsx3[0]; - nBS[0][2][3] = uiBsx3[1]; - nBS[0][3][3] = uiBsx3[2]; + nBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor; + nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor; + nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor; * (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor; } @@ -188,22 +183,26 @@ void_t static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t n uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b0); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0); nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1); nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b1); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4); nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5); nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b2); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8); nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9); nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b3); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12); nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13); nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14); diff --git a/codec/encoder/core/src/deblocking.cpp b/codec/encoder/core/src/deblocking.cpp index 42d5337b..7bc1b402 100644 --- a/codec/encoder/core/src/deblocking.cpp +++ b/codec/encoder/core/src/deblocking.cpp @@ -156,34 +156,29 @@ static const ALIGNED_DECLARE (int32_t, g_kiTableBlock8x8NIdx[2][4][4], 16) = { void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4], int32_t iLShiftFactor) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; - ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor; - uiBS[0][1][0] = uiBsx3[0]; - uiBS[0][2][0] = uiBsx3[1]; - uiBS[0][3][0] = uiBsx3[2]; + uiBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor; + uiBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor; + uiBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor; - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor; - uiBS[0][1][1] = uiBsx3[0]; - uiBS[0][2][1] = uiBsx3[1]; - uiBS[0][3][1] = uiBsx3[2]; + uiBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor; + uiBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor; + uiBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor; * (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor; - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor; - uiBS[0][1][2] = uiBsx3[0]; - uiBS[0][2][2] = uiBsx3[1]; - uiBS[0][3][2] = uiBsx3[2]; + uiBS[0][1][2] = (pNnzTab[8] | pNnzTab[9]) << iLShiftFactor; + uiBS[0][2][2] = (pNnzTab[9] | pNnzTab[10]) << iLShiftFactor; + uiBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor; * (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor; - * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor; - uiBS[0][1][3] = uiBsx3[0]; - uiBS[0][2][3] = uiBsx3[1]; - uiBS[0][3][3] = uiBsx3[2]; + uiBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor; + uiBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor; + uiBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor; * (uint32_t*)uiBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor; } @@ -197,22 +192,26 @@ void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8 uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b0); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0); uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1); uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b1); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4); uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5); uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b2); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8); uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9); uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10); - * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b3); + for (int i = 0; i < 3; i++) + uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12); uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13); uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14);