Remove global function pointers in the encoder MC code
This is similar to how this is done in the decoder part of MC.
This commit is contained in:
parent
130c682c29
commit
e5609bc0fe
@ -81,18 +81,6 @@ static const uint8_t g_kuiABCD[8][8][4] = { ////g_kuiA[dy][dx], g_kuiB[dy][dx],
|
||||
{4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
|
||||
}
|
||||
};
|
||||
typedef void (*WelsMcFunc0) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight);
|
||||
typedef void (*WelsMcFunc1) (uint8_t* pDst, int32_t iDstStride, const uint8_t* psrcA, int32_t iSrcAStride,
|
||||
const uint8_t* pSrcB,
|
||||
int32_t iSrcBStride, int32_t iHeight);
|
||||
WelsMcFunc0 McCopyWidthEq16 = NULL;
|
||||
WelsMcFunc0 McCopyWidthEq8 = NULL;
|
||||
WelsMcFunc0 McCopyWidthEq4 = NULL;
|
||||
WelsMcFunc0 pfMcHorVer02WidthEq16 = NULL;
|
||||
WelsMcFunc1 pfPixelAvgWidthEq16 = NULL;
|
||||
WelsMcFunc0 pfMcHorVer20WidthEq16 = NULL;
|
||||
WelsMcFunc0 pfMcHorVer22WidthEq16 = NULL;
|
||||
|
||||
//***************************************************************************//
|
||||
// C code implementation //
|
||||
@ -224,97 +212,97 @@ static inline void McHorVer22WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStrid
|
||||
|
||||
/////////////////////luma MC//////////////////////////
|
||||
|
||||
static inline void McHorVer01WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer03WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer10WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer11WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer12WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer13WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer21WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer23WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer30WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer31WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer32WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer33WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
|
||||
static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
@ -360,15 +348,15 @@ static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_
|
||||
pDst += iDstStride;
|
||||
}
|
||||
}
|
||||
static inline void McCopy (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
||||
int32_t iHeight) {
|
||||
static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
||||
int32_t iHeight) {
|
||||
int32_t i;
|
||||
if (iWidth == 16 && McCopyWidthEq16 != NULL)
|
||||
McCopyWidthEq16 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else if (iWidth == 8 && McCopyWidthEq8 != NULL)
|
||||
McCopyWidthEq8 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else if (iWidth == 4 && McCopyWidthEq4 != NULL)
|
||||
McCopyWidthEq4 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
if (iWidth == 16)
|
||||
McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else if (iWidth == 8)
|
||||
McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else if (iWidth == 4)
|
||||
McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else {
|
||||
for (i = 0; i < iHeight; i++) {
|
||||
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
|
||||
@ -386,7 +374,7 @@ void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t
|
||||
const int32_t kiDy = mv.iMvY & 0x07;
|
||||
|
||||
if (0 == kiDx && 0 == kiDy) {
|
||||
McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
||||
McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
||||
} else {
|
||||
const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0];
|
||||
const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1];
|
||||
@ -441,6 +429,117 @@ void McHorVer22Width9Or17Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStri
|
||||
McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
|
||||
}
|
||||
|
||||
static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iHeight) {
|
||||
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
|
||||
|
||||
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
|
||||
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
|
||||
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
|
||||
}
|
||||
|
||||
static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
int32_t iWidth, int32_t iHeight) {
|
||||
int32_t i;
|
||||
if (iWidth == 16)
|
||||
McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else if (iWidth == 8)
|
||||
McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else if (iWidth == 4)
|
||||
McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||
else {
|
||||
for (i = 0; i < iHeight; i++) {
|
||||
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
|
||||
pDst += iDstStride;
|
||||
pSrc += iSrcStride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
const uint8_t* pABCD, int32_t iHeigh);
|
||||
void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||
@ -453,7 +552,7 @@ void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int3
|
||||
};
|
||||
|
||||
if (0 == kiD8x && 0 == kiD8y) {
|
||||
McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
||||
McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
||||
} else {
|
||||
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
|
||||
}
|
||||
@ -469,7 +568,7 @@ void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int
|
||||
McChromaWidthEq8_ssse3
|
||||
};
|
||||
if (0 == kiD8x && 0 == kiD8y) {
|
||||
McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
||||
McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
|
||||
} else {
|
||||
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
|
||||
}
|
||||
@ -671,17 +770,17 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
||||
static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
|
||||
|
||||
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
|
||||
McCopyWidthEq16_c, McHorVer10WidthEq16, McHorVer20WidthEq16_c, McHorVer30WidthEq16,
|
||||
McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
|
||||
McHorVer02WidthEq16_c, McHorVer12WidthEq16, McHorVer22WidthEq16_c, McHorVer32WidthEq16,
|
||||
McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
|
||||
McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
|
||||
McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
|
||||
McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
|
||||
McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
|
||||
};
|
||||
#if defined (X86_ASM)
|
||||
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
|
||||
McCopyWidthEq16_sse2, McHorVer10WidthEq16, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16,
|
||||
McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
|
||||
McHorVer02WidthEq16_sse2, McHorVer12WidthEq16, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16,
|
||||
McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
|
||||
McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
|
||||
McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
|
||||
McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
|
||||
McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
|
||||
};
|
||||
#endif
|
||||
#if defined(HAVE_NEON)
|
||||
@ -705,13 +804,6 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
||||
pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_c;
|
||||
memcpy (pFuncList->sMcFuncs.pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
|
||||
pFuncList->sMcFuncs.pfChromaMc = McChroma_c;
|
||||
McCopyWidthEq4 = McCopyWidthEq4_c;
|
||||
McCopyWidthEq8 = McCopyWidthEq8_c;
|
||||
McCopyWidthEq16 = McCopyWidthEq16_c;
|
||||
pfPixelAvgWidthEq16 = PixelAvgWidthEq16_c;
|
||||
pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_c;
|
||||
pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_c;
|
||||
pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_c;
|
||||
memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16));
|
||||
#if defined (X86_ASM)
|
||||
if (uiCpuFlag & WELS_CPU_SSE2) {
|
||||
@ -721,13 +813,6 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
||||
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
|
||||
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
|
||||
pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2;
|
||||
McCopyWidthEq4 = McCopyWidthEq4_mmx;
|
||||
McCopyWidthEq8 = McCopyWidthEq8_mmx;
|
||||
McCopyWidthEq16 = McCopyWidthEq16_sse2;
|
||||
pfPixelAvgWidthEq16 = PixelAvgWidthEq16_sse2;
|
||||
pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_sse2;
|
||||
pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_sse2;
|
||||
pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_sse2;
|
||||
memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2));
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user