Remove global function pointers in the encoder MC code

This is similar to how this is done in the decoder part of MC.
This commit is contained in:
Martin Storsjö 2014-06-29 17:43:28 +03:00
parent 130c682c29
commit e5609bc0fe

View File

@ -81,18 +81,6 @@ static const uint8_t g_kuiABCD[8][8][4] = { ////g_kuiA[dy][dx], g_kuiB[dy][dx],
{4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
}
};
typedef void (*WelsMcFunc0) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
typedef void (*WelsMcFunc1) (uint8_t* pDst, int32_t iDstStride, const uint8_t* psrcA, int32_t iSrcAStride,
const uint8_t* pSrcB,
int32_t iSrcBStride, int32_t iHeight);
WelsMcFunc0 McCopyWidthEq16 = NULL;
WelsMcFunc0 McCopyWidthEq8 = NULL;
WelsMcFunc0 McCopyWidthEq4 = NULL;
WelsMcFunc0 pfMcHorVer02WidthEq16 = NULL;
WelsMcFunc1 pfPixelAvgWidthEq16 = NULL;
WelsMcFunc0 pfMcHorVer20WidthEq16 = NULL;
WelsMcFunc0 pfMcHorVer22WidthEq16 = NULL;
//***************************************************************************//
// C code implementation //
@ -224,97 +212,97 @@ static inline void McHorVer22WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStrid
/////////////////////luma MC//////////////////////////
static inline void McHorVer01WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer03WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer10WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer11WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer12WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer13WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer21WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer23WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer30WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer31WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer32WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer33WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@ -360,15 +348,15 @@ static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_
pDst += iDstStride;
}
}
static inline void McCopy (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
int32_t iHeight) {
static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
int32_t iHeight) {
int32_t i;
if (iWidth == 16 && McCopyWidthEq16 != NULL)
McCopyWidthEq16 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 8 && McCopyWidthEq8 != NULL)
McCopyWidthEq8 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 4 && McCopyWidthEq4 != NULL)
McCopyWidthEq4 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
if (iWidth == 16)
McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 8)
McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 4)
McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else {
for (i = 0; i < iHeight; i++) {
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
@ -386,7 +374,7 @@ void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t
const int32_t kiDy = mv.iMvY & 0x07;
if (0 == kiDx && 0 == kiDy) {
McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0];
const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1];
@ -441,6 +429,117 @@ void McHorVer22Width9Or17Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStri
McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
}
static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
}
static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight) {
int32_t i;
if (iWidth == 16)
McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 8)
McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 4)
McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else {
for (i = 0; i < iHeight; i++) {
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
pDst += iDstStride;
pSrc += iSrcStride;
}
}
}
typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
const uint8_t* pABCD, int32_t iHeigh);
void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@ -453,7 +552,7 @@ void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int3
};
if (0 == kiD8x && 0 == kiD8y) {
McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
}
@ -469,7 +568,7 @@ void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int
McChromaWidthEq8_ssse3
};
if (0 == kiD8x && 0 == kiD8y) {
McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
}
@ -671,17 +770,17 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
McCopyWidthEq16_c, McHorVer10WidthEq16, McHorVer20WidthEq16_c, McHorVer30WidthEq16,
McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
McHorVer02WidthEq16_c, McHorVer12WidthEq16, McHorVer22WidthEq16_c, McHorVer32WidthEq16,
McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
};
#if defined (X86_ASM)
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
McCopyWidthEq16_sse2, McHorVer10WidthEq16, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16,
McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
McHorVer02WidthEq16_sse2, McHorVer12WidthEq16, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16,
McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
};
#endif
#if defined(HAVE_NEON)
@ -705,13 +804,6 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_c;
memcpy (pFuncList->sMcFuncs.pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
pFuncList->sMcFuncs.pfChromaMc = McChroma_c;
McCopyWidthEq4 = McCopyWidthEq4_c;
McCopyWidthEq8 = McCopyWidthEq8_c;
McCopyWidthEq16 = McCopyWidthEq16_c;
pfPixelAvgWidthEq16 = PixelAvgWidthEq16_c;
pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_c;
pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_c;
pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_c;
memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16));
#if defined (X86_ASM)
if (uiCpuFlag & WELS_CPU_SSE2) {
@ -721,13 +813,6 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2;
McCopyWidthEq4 = McCopyWidthEq4_mmx;
McCopyWidthEq8 = McCopyWidthEq8_mmx;
McCopyWidthEq16 = McCopyWidthEq16_sse2;
pfPixelAvgWidthEq16 = PixelAvgWidthEq16_sse2;
pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_sse2;
pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_sse2;
pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_sse2;
memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2));
}