Merge pull request #255 from mstorsjo/unify-stack-align-macros
Remove the array_stack_align.h header from the encoder lib
This commit is contained in:
commit
914650abc6
@ -1432,10 +1432,6 @@
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\encoder\core\inc\array_stack_align.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\encoder\core\inc\as264_common.h"
|
||||
>
|
||||
|
@ -184,7 +184,7 @@ void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, i
|
||||
#ifdef X86_ASM
|
||||
extern "C" {
|
||||
void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
|
||||
|
||||
DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
|
||||
DeblockLumaLt4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
|
||||
@ -192,7 +192,7 @@ extern "C" {
|
||||
}
|
||||
|
||||
void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
|
||||
|
||||
DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
|
||||
DeblockLumaEq4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
|
||||
|
@ -45,16 +45,16 @@
|
||||
#include "typedefs.h"
|
||||
|
||||
/*
|
||||
* FORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack
|
||||
* ENFORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack
|
||||
* _tp: type
|
||||
* _nm: var name
|
||||
* _sz: size
|
||||
* _al: align bytes
|
||||
* auxiliary var: _nm ## _tEmP
|
||||
*/
|
||||
#define FORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
|
||||
#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
|
||||
_tp _nm ## _tEmP[(_sz)+(_al)-1]; \
|
||||
_tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp))
|
||||
_tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp));
|
||||
|
||||
|
||||
#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \
|
||||
|
@ -145,7 +145,7 @@ static const uint8_t g_kuiTableBIdx[2][8] = {
|
||||
|
||||
void_t inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
|
||||
uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4);
|
||||
|
||||
uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
|
||||
uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
|
||||
@ -181,7 +181,7 @@ void_t static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t n
|
||||
int32_t iMbXy) {
|
||||
uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
|
||||
int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
|
||||
|
||||
uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
|
||||
uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
|
||||
@ -269,7 +269,7 @@ void_t FilteringEdgeLumaH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iS
|
||||
int32_t iIndexA;
|
||||
int32_t iAlpha;
|
||||
int32_t iBeta;
|
||||
FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
|
||||
GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
|
||||
iBeta);
|
||||
@ -286,7 +286,7 @@ void_t FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iS
|
||||
int32_t iIndexA;
|
||||
int32_t iAlpha;
|
||||
int32_t iBeta;
|
||||
FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
|
||||
GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
|
||||
iBeta);
|
||||
@ -331,7 +331,7 @@ void_t FilteringEdgeChromaH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_
|
||||
int32_t iIndexA;
|
||||
int32_t iAlpha;
|
||||
int32_t iBeta;
|
||||
FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
|
||||
GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
|
||||
iBeta);
|
||||
@ -347,7 +347,7 @@ void_t FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_
|
||||
int32_t iIndexA;
|
||||
int32_t iAlpha;
|
||||
int32_t iBeta;
|
||||
FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
|
||||
|
||||
GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
|
||||
iBeta);
|
||||
@ -483,8 +483,8 @@ void_t FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, in
|
||||
int32_t iCurQp;
|
||||
int32_t iIndexA, iAlpha, iBeta;
|
||||
|
||||
FORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
|
||||
|
||||
pDestY = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
|
||||
iCurQp = pCurDqLayer->pLumaQp[iMbXyIndex];
|
||||
@ -531,8 +531,8 @@ void_t FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter,
|
||||
int32_t iCurQp;
|
||||
int32_t iIndexA, iAlpha, iBeta;
|
||||
|
||||
FORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
|
||||
|
||||
pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3);
|
||||
pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3);
|
||||
|
@ -492,7 +492,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
|
||||
int32_t iNMbMode, i;
|
||||
uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
|
||||
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
|
||||
|
||||
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
|
||||
pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
|
||||
@ -553,7 +553,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
|
||||
memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy])); //JVT-x201wcm1.doc, page229, 2009.10.23
|
||||
return 0;
|
||||
} else if (0 == uiMbType) { //reference to JM
|
||||
FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
|
||||
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
|
||||
pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
|
||||
if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
|
||||
@ -770,7 +770,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
|
||||
int32_t iNMbMode, i;
|
||||
uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
|
||||
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
|
||||
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
|
||||
|
||||
uiMbType = BsGetUe (pBs);
|
||||
@ -859,7 +859,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
|
||||
return 0;
|
||||
} else {
|
||||
if (0 == uiMbType) {
|
||||
FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
|
||||
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
|
||||
pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
|
||||
if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
|
||||
|
@ -420,7 +420,7 @@ static inline void_t McHorVer22_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
|
||||
static inline void_t McHorVer01_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
@ -434,7 +434,7 @@ static inline void_t McHorVer01_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer03_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
@ -448,7 +448,7 @@ static inline void_t McHorVer03_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer10_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
@ -462,8 +462,8 @@ static inline void_t McHorVer10_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer11_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
|
||||
@ -480,8 +480,8 @@ static inline void_t McHorVer11_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer12_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
|
||||
@ -498,8 +498,8 @@ static inline void_t McHorVer12_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer13_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
|
||||
@ -516,8 +516,8 @@ static inline void_t McHorVer13_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer21_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
|
||||
@ -534,8 +534,8 @@ static inline void_t McHorVer21_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer23_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
|
||||
@ -552,7 +552,7 @@ static inline void_t McHorVer23_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer30_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
|
||||
@ -566,8 +566,8 @@ static inline void_t McHorVer30_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer31_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
|
||||
@ -584,8 +584,8 @@ static inline void_t McHorVer31_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer32_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
|
||||
@ -602,8 +602,8 @@ static inline void_t McHorVer32_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t
|
||||
}
|
||||
static inline void_t McHorVer33_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
|
||||
if (iWidth == 16) {
|
||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
|
||||
@ -666,4 +666,4 @@ void_t InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu) {
|
||||
#endif //(X86_ASM)
|
||||
}
|
||||
|
||||
} // namespace WelsDec
|
||||
} // namespace WelsDec
|
||||
|
@ -237,7 +237,7 @@ static inline void_t BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t
|
||||
uint8_t* pDstV = pMCRefMem->pDstV;
|
||||
bool_t bExpand = false;
|
||||
|
||||
FORCE_STACK_ALIGN_1D (uint8_t, uiExpandBuf, (PADDING_LENGTH + 6) * (PADDING_LENGTH + 6), 16);
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, uiExpandBuf, (PADDING_LENGTH + 6) * (PADDING_LENGTH + 6), 16);
|
||||
|
||||
if (iFullMVx & 0x07) {
|
||||
iExpandWidth -= 3;
|
||||
|
@ -1,121 +0,0 @@
|
||||
/*!
|
||||
* \copy
|
||||
* Copyright (c) 2011-2013, Cisco Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*
|
||||
* \file array_stack_align.h
|
||||
*
|
||||
* \brief promised alignment of array pData declaration on stack
|
||||
* multidimensional array can be extended if applicable need
|
||||
*
|
||||
* \date 8/8/2011 Created
|
||||
* 8/12/2011 functionality implementation for multidimensional array
|
||||
* 8/26/2011 better solution with reducing extra memory used,
|
||||
* stack size is adaptively reduced by _tp & _al
|
||||
*
|
||||
*************************************************************************************
|
||||
*/
|
||||
#ifndef ARRAY_STACK_ALIGN_H__
|
||||
#define ARRAY_STACK_ALIGN_H__
|
||||
|
||||
#include <assert.h>
|
||||
#include "typedefs.h"
|
||||
|
||||
/*
|
||||
* ENFORCE_STACK_ALIGN_1D: force 1 dimension local pData aligned in stack
|
||||
* _tp: type
|
||||
* _nm: var name
|
||||
* _sz: size
|
||||
* _al: align bytes
|
||||
* auxiliary var: _nm ## _tEmP
|
||||
* NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro
|
||||
*/
|
||||
|
||||
//#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
|
||||
//_tp _nm ## _tEmP[(_sz)+(_al)-1]; \
|
||||
//_tp *_nm = _nm ## _tEmP + ((_al)-1); \
|
||||
//_nm -= (((int32_t)_nm & ((_al)-1))/sizeof(_tp));
|
||||
|
||||
/* Another better solution with reducing extra memory used */
|
||||
#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
|
||||
assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\
|
||||
_tp _nm ## _tEmP[(_sz)+(_al)/sizeof(_tp)-1]; \
|
||||
_tp *_nm = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \
|
||||
_nm -= (((uintptr_t)_nm & ((_al)-1))/sizeof(_tp));
|
||||
|
||||
/*
|
||||
* ENFORCE_STACK_ALIGN_2D: force 2 dimension local pData aligned in stack
|
||||
* _tp: type
|
||||
* _nm: var name
|
||||
* _cx, _cy: size in x, y dimension
|
||||
* _al: align bytes
|
||||
* auxiliary var: _nm ## _tEmP, _nm ## _tEmP_al
|
||||
* NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro
|
||||
*/
|
||||
|
||||
//#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \
|
||||
//_tp _nm ## _tEmP[(_cx)*(_cy)+(_al)-1]; \
|
||||
//_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)-1); \
|
||||
//_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
|
||||
//_tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al;
|
||||
|
||||
/* Another better solution with reducing extra memory used */
|
||||
#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \
|
||||
assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\
|
||||
_tp _nm ## _tEmP[(_cx)*(_cy)+(_al)/sizeof(_tp)-1]; \
|
||||
_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \
|
||||
_nm ## _tEmP_al -= (((uintptr_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
|
||||
_tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al;
|
||||
|
||||
/*
|
||||
* ENFORCE_STACK_ALIGN_3D: force 3 dimension local pData aligned in stack
|
||||
* _tp: type
|
||||
* _nm: var name
|
||||
* _cx, _cy, _cz: size in x, y, z dimension
|
||||
* _al: align bytes
|
||||
* auxiliary var: _nm ## _tEmP, _nm ## _tEmP_al
|
||||
* NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro
|
||||
*/
|
||||
|
||||
//#define ENFORCE_STACK_ALIGN_3D(_tp, _nm, _cx, _cy, _cz, _al) \
|
||||
//_tp _nm ## _tEmP[(_cx)*(_cy)*(_cz)+(_al)-1]; \
|
||||
//_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)-1); \
|
||||
//_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
|
||||
//_tp (*_nm)[(_cy)][(_cz)] = (_tp (*)[(_cy)][(_cz)])_nm ## _tEmP_al;
|
||||
|
||||
/* Another better solution with reducing extra memory used */
|
||||
#define ENFORCE_STACK_ALIGN_3D(_tp, _nm, _cx, _cy, _cz, _al) \
|
||||
assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\
|
||||
_tp _nm ## _tEmP[(_cx)*(_cy)*(_cz)+(_al)/sizeof(_tp)-1]; \
|
||||
_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \
|
||||
_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
|
||||
_tp (*_nm)[(_cy)][(_cz)] = (_tp (*)[(_cy)][(_cz)])_nm ## _tEmP_al;
|
||||
|
||||
#endif//ARRAY_STACK_ALIGN_H__
|
||||
|
@ -40,7 +40,6 @@
|
||||
|
||||
#include "deblocking.h"
|
||||
#include "cpu_core.h"
|
||||
#include "array_stack_align.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
|
||||
|
@ -51,7 +51,6 @@
|
||||
#include "ref_list_mgr_svc.h"
|
||||
#include "ls_defines.h"
|
||||
#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms
|
||||
#include "array_stack_align.h"
|
||||
#if defined(MT_ENABLED)
|
||||
#include "slice_multi_threading.h"
|
||||
#endif//MT_ENABLED
|
||||
|
@ -42,7 +42,6 @@
|
||||
#include "ls_defines.h"
|
||||
#include "cpu_core.h"
|
||||
#include "get_intra_predictor.h"
|
||||
#include "array_stack_align.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
#define I4x4_COUNT 4
|
||||
|
@ -40,7 +40,6 @@
|
||||
|
||||
#include "mc.h"
|
||||
#include "cpu_core.h"
|
||||
#include "array_stack_align.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
/*------------------weight for chroma fraction pixel interpolation------------------*/
|
||||
|
@ -42,7 +42,6 @@
|
||||
#include "md.h"
|
||||
#include "cpu_core.h"
|
||||
#include "svc_enc_golomb.h"
|
||||
#include "array_stack_align.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
#define INTRA_VARIANCE_SAD_THRESHOLD 150
|
||||
|
@ -41,7 +41,6 @@
|
||||
#include "set_mb_syn_cavlc.h"
|
||||
#include "vlc_encoder.h"
|
||||
#include "cpu_core.h"
|
||||
#include "array_stack_align.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
SCoeffFunc sCoeffFunc;
|
||||
|
@ -42,7 +42,6 @@
|
||||
#include "encode_mb_aux.h"
|
||||
#include "decode_mb_aux.h"
|
||||
#include "ls_defines.h"
|
||||
#include "array_stack_align.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
void WelsDctMb (int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4) {
|
||||
|
@ -41,7 +41,6 @@
|
||||
|
||||
|
||||
#include "svc_motion_estimate.h"
|
||||
#include "array_stack_align.h"
|
||||
|
||||
namespace WelsSVCEnc {
|
||||
/*!
|
||||
|
Loading…
Reference in New Issue
Block a user