diff --git a/codec/build/win32/enc/WelsEncCore.vcproj b/codec/build/win32/enc/WelsEncCore.vcproj index 403dcef2..81d821f4 100644 --- a/codec/build/win32/enc/WelsEncCore.vcproj +++ b/codec/build/win32/enc/WelsEncCore.vcproj @@ -1432,10 +1432,6 @@ Name="Header Files" Filter="h;hpp;hxx;hm;inl" > - - diff --git a/codec/common/deblocking_common.cpp b/codec/common/deblocking_common.cpp index 9a773671..aab3500a 100644 --- a/codec/common/deblocking_common.cpp +++ b/codec/common/deblocking_common.cpp @@ -184,7 +184,7 @@ void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, i #ifdef X86_ASM extern "C" { void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) { - FORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]); DeblockLumaLt4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc); @@ -192,7 +192,7 @@ extern "C" { } void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) { - FORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]); DeblockLumaEq4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta); diff --git a/codec/common/macros.h b/codec/common/macros.h index 9ab97341..6666f679 100644 --- a/codec/common/macros.h +++ b/codec/common/macros.h @@ -45,16 +45,16 @@ #include "typedefs.h" /* -* FORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack +* ENFORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack * _tp: type * _nm: var name * _sz: size * _al: align bytes * auxiliary var: _nm ## _tEmP */ -#define FORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \ +#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \ _tp _nm ## _tEmP[(_sz)+(_al)-1]; \ - _tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp)) + _tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp)); #define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \ diff --git a/codec/decoder/core/src/deblocking.cpp b/codec/decoder/core/src/deblocking.cpp index 65876f19..fd4b2ad1 100644 --- a/codec/decoder/core/src/deblocking.cpp +++ b/codec/decoder/core/src/deblocking.cpp @@ -145,7 +145,7 @@ static const uint8_t g_kuiTableBIdx[2][8] = { void_t inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; - FORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); @@ -181,7 +181,7 @@ void_t static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t n int32_t iMbXy) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy]; - FORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); @@ -269,7 +269,7 @@ void_t FilteringEdgeLumaH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iS int32_t iIndexA; int32_t iAlpha; int32_t iBeta; - FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, iBeta); @@ -286,7 +286,7 @@ void_t FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iS int32_t iIndexA; int32_t iAlpha; int32_t iBeta; - FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, iBeta); @@ -331,7 +331,7 @@ void_t FilteringEdgeChromaH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_ int32_t iIndexA; int32_t iAlpha; int32_t iBeta; - FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, iBeta); @@ -347,7 +347,7 @@ void_t FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_ int32_t iIndexA; int32_t iAlpha; int32_t iBeta; - FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); + ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, iBeta); @@ -483,8 +483,8 @@ void_t FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, in int32_t iCurQp; int32_t iIndexA, iAlpha, iBeta; - FORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); - FORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); pDestY = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4); iCurQp = pCurDqLayer->pLumaQp[iMbXyIndex]; @@ -531,8 +531,8 @@ void_t FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iCurQp; int32_t iIndexA, iAlpha, iBeta; - FORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); - FORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); + ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3); pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3); diff --git a/codec/decoder/core/src/decode_slice.cpp b/codec/decoder/core/src/decode_slice.cpp index 7ab71cfc..7ddfd99d 100644 --- a/codec/decoder/core/src/decode_slice.cpp +++ b/codec/decoder/core/src/decode_slice.cpp @@ -492,7 +492,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { int32_t iNMbMode, i; uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; - FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; @@ -553,7 +553,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy])); //JVT-x201wcm1.doc, page229, 2009.10.23 return 0; } else if (0 == uiMbType) { //reference to JM - FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) { @@ -770,7 +770,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { int32_t iNMbMode, i; uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; - FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23 uiMbType = BsGetUe (pBs); @@ -859,7 +859,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { return 0; } else { if (0 == uiMbType) { - FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) { diff --git a/codec/decoder/core/src/mc.cpp b/codec/decoder/core/src/mc.cpp index 3528b62d..4f6c9419 100644 --- a/codec/decoder/core/src/mc.cpp +++ b/codec/decoder/core/src/mc.cpp @@ -420,7 +420,7 @@ static inline void_t McHorVer22_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t static inline void_t McHorVer01_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); if (iWidth == 16) { McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); @@ -434,7 +434,7 @@ static inline void_t McHorVer01_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer03_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); if (iWidth == 16) { McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight); @@ -448,7 +448,7 @@ static inline void_t McHorVer03_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer10_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); @@ -462,8 +462,8 @@ static inline void_t McHorVer10_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer11_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); @@ -480,8 +480,8 @@ static inline void_t McHorVer11_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer12_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); if (iWidth == 16) { McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); @@ -498,8 +498,8 @@ static inline void_t McHorVer12_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer13_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight); @@ -516,8 +516,8 @@ static inline void_t McHorVer13_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer21_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); @@ -534,8 +534,8 @@ static inline void_t McHorVer21_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer23_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); @@ -552,7 +552,7 @@ static inline void_t McHorVer23_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer30_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight); @@ -566,8 +566,8 @@ static inline void_t McHorVer30_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer31_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight); McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); @@ -584,8 +584,8 @@ static inline void_t McHorVer31_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer32_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16); if (iWidth == 16) { McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight); @@ -602,8 +602,8 @@ static inline void_t McHorVer32_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t } static inline void_t McHorVer33_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { - FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); - FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); @@ -666,4 +666,4 @@ void_t InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu) { #endif //(X86_ASM) } -} // namespace WelsDec \ No newline at end of file +} // namespace WelsDec diff --git a/codec/decoder/core/src/rec_mb.cpp b/codec/decoder/core/src/rec_mb.cpp index 1fea619f..0f526c69 100644 --- a/codec/decoder/core/src/rec_mb.cpp +++ b/codec/decoder/core/src/rec_mb.cpp @@ -237,7 +237,7 @@ static inline void_t BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t uint8_t* pDstV = pMCRefMem->pDstV; bool_t bExpand = false; - FORCE_STACK_ALIGN_1D (uint8_t, uiExpandBuf, (PADDING_LENGTH + 6) * (PADDING_LENGTH + 6), 16); + ENFORCE_STACK_ALIGN_1D (uint8_t, uiExpandBuf, (PADDING_LENGTH + 6) * (PADDING_LENGTH + 6), 16); if (iFullMVx & 0x07) { iExpandWidth -= 3; diff --git a/codec/encoder/core/inc/array_stack_align.h b/codec/encoder/core/inc/array_stack_align.h deleted file mode 100644 index f4fad454..00000000 --- a/codec/encoder/core/inc/array_stack_align.h +++ /dev/null @@ -1,121 +0,0 @@ -/*! - * \copy - * Copyright (c) 2011-2013, Cisco Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * - * \file array_stack_align.h - * - * \brief promised alignment of array pData declaration on stack - * multidimensional array can be extended if applicable need - * - * \date 8/8/2011 Created - * 8/12/2011 functionality implementation for multidimensional array - * 8/26/2011 better solution with reducing extra memory used, - * stack size is adaptively reduced by _tp & _al - * - ************************************************************************************* - */ -#ifndef ARRAY_STACK_ALIGN_H__ -#define ARRAY_STACK_ALIGN_H__ - -#include -#include "typedefs.h" - -/* - * ENFORCE_STACK_ALIGN_1D: force 1 dimension local pData aligned in stack - * _tp: type - * _nm: var name - * _sz: size - * _al: align bytes - * auxiliary var: _nm ## _tEmP - * NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro - */ - -//#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \ -//_tp _nm ## _tEmP[(_sz)+(_al)-1]; \ -//_tp *_nm = _nm ## _tEmP + ((_al)-1); \ -//_nm -= (((int32_t)_nm & ((_al)-1))/sizeof(_tp)); - -/* Another better solution with reducing extra memory used */ -#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \ -assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\ -_tp _nm ## _tEmP[(_sz)+(_al)/sizeof(_tp)-1]; \ -_tp *_nm = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \ -_nm -= (((uintptr_t)_nm & ((_al)-1))/sizeof(_tp)); - -/* - * ENFORCE_STACK_ALIGN_2D: force 2 dimension local pData aligned in stack - * _tp: type - * _nm: var name - * _cx, _cy: size in x, y dimension - * _al: align bytes - * auxiliary var: _nm ## _tEmP, _nm ## _tEmP_al - * NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro - */ - -//#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \ -//_tp _nm ## _tEmP[(_cx)*(_cy)+(_al)-1]; \ -//_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)-1); \ -//_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \ -//_tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al; - -/* Another better solution with reducing extra memory used */ -#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \ -assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\ -_tp _nm ## _tEmP[(_cx)*(_cy)+(_al)/sizeof(_tp)-1]; \ -_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \ -_nm ## _tEmP_al -= (((uintptr_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \ -_tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al; - -/* - * ENFORCE_STACK_ALIGN_3D: force 3 dimension local pData aligned in stack - * _tp: type - * _nm: var name - * _cx, _cy, _cz: size in x, y, z dimension - * _al: align bytes - * auxiliary var: _nm ## _tEmP, _nm ## _tEmP_al - * NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro - */ - -//#define ENFORCE_STACK_ALIGN_3D(_tp, _nm, _cx, _cy, _cz, _al) \ -//_tp _nm ## _tEmP[(_cx)*(_cy)*(_cz)+(_al)-1]; \ -//_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)-1); \ -//_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \ -//_tp (*_nm)[(_cy)][(_cz)] = (_tp (*)[(_cy)][(_cz)])_nm ## _tEmP_al; - -/* Another better solution with reducing extra memory used */ -#define ENFORCE_STACK_ALIGN_3D(_tp, _nm, _cx, _cy, _cz, _al) \ -assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\ -_tp _nm ## _tEmP[(_cx)*(_cy)*(_cz)+(_al)/sizeof(_tp)-1]; \ -_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \ -_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \ -_tp (*_nm)[(_cy)][(_cz)] = (_tp (*)[(_cy)][(_cz)])_nm ## _tEmP_al; - -#endif//ARRAY_STACK_ALIGN_H__ - diff --git a/codec/encoder/core/src/deblocking.cpp b/codec/encoder/core/src/deblocking.cpp index 37e56d7c..c127a6d3 100644 --- a/codec/encoder/core/src/deblocking.cpp +++ b/codec/encoder/core/src/deblocking.cpp @@ -40,7 +40,6 @@ #include "deblocking.h" #include "cpu_core.h" -#include "array_stack_align.h" namespace WelsSVCEnc { diff --git a/codec/encoder/core/src/encoder_ext.cpp b/codec/encoder/core/src/encoder_ext.cpp index a3317070..8e48df8e 100644 --- a/codec/encoder/core/src/encoder_ext.cpp +++ b/codec/encoder/core/src/encoder_ext.cpp @@ -51,7 +51,6 @@ #include "ref_list_mgr_svc.h" #include "ls_defines.h" #include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms -#include "array_stack_align.h" #if defined(MT_ENABLED) #include "slice_multi_threading.h" #endif//MT_ENABLED diff --git a/codec/encoder/core/src/get_intra_predictor.cpp b/codec/encoder/core/src/get_intra_predictor.cpp index a54b33b5..32d2fc44 100644 --- a/codec/encoder/core/src/get_intra_predictor.cpp +++ b/codec/encoder/core/src/get_intra_predictor.cpp @@ -42,7 +42,6 @@ #include "ls_defines.h" #include "cpu_core.h" #include "get_intra_predictor.h" -#include "array_stack_align.h" namespace WelsSVCEnc { #define I4x4_COUNT 4 diff --git a/codec/encoder/core/src/mc.cpp b/codec/encoder/core/src/mc.cpp index 1fa8a886..51dc37b7 100644 --- a/codec/encoder/core/src/mc.cpp +++ b/codec/encoder/core/src/mc.cpp @@ -40,7 +40,6 @@ #include "mc.h" #include "cpu_core.h" -#include "array_stack_align.h" namespace WelsSVCEnc { /*------------------weight for chroma fraction pixel interpolation------------------*/ diff --git a/codec/encoder/core/src/md.cpp b/codec/encoder/core/src/md.cpp index 8ed0c2d8..d7f335f8 100644 --- a/codec/encoder/core/src/md.cpp +++ b/codec/encoder/core/src/md.cpp @@ -42,7 +42,6 @@ #include "md.h" #include "cpu_core.h" #include "svc_enc_golomb.h" -#include "array_stack_align.h" namespace WelsSVCEnc { #define INTRA_VARIANCE_SAD_THRESHOLD 150 diff --git a/codec/encoder/core/src/set_mb_syn_cavlc.cpp b/codec/encoder/core/src/set_mb_syn_cavlc.cpp index 8ba34d63..5d95d626 100644 --- a/codec/encoder/core/src/set_mb_syn_cavlc.cpp +++ b/codec/encoder/core/src/set_mb_syn_cavlc.cpp @@ -41,7 +41,6 @@ #include "set_mb_syn_cavlc.h" #include "vlc_encoder.h" #include "cpu_core.h" -#include "array_stack_align.h" namespace WelsSVCEnc { SCoeffFunc sCoeffFunc; diff --git a/codec/encoder/core/src/svc_encode_mb.cpp b/codec/encoder/core/src/svc_encode_mb.cpp index 1349bee7..1ca30866 100644 --- a/codec/encoder/core/src/svc_encode_mb.cpp +++ b/codec/encoder/core/src/svc_encode_mb.cpp @@ -42,7 +42,6 @@ #include "encode_mb_aux.h" #include "decode_mb_aux.h" #include "ls_defines.h" -#include "array_stack_align.h" namespace WelsSVCEnc { void WelsDctMb (int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4) { diff --git a/codec/encoder/core/src/svc_motion_estimate.cpp b/codec/encoder/core/src/svc_motion_estimate.cpp index 70561514..9ff45e3c 100644 --- a/codec/encoder/core/src/svc_motion_estimate.cpp +++ b/codec/encoder/core/src/svc_motion_estimate.cpp @@ -41,7 +41,6 @@ #include "svc_motion_estimate.h" -#include "array_stack_align.h" namespace WelsSVCEnc { /*!