yuv: rationalize the C/SSE2 function naming

+ implement some easy missing targets in SSE2 (565/4444)

Change-Id: Ib575f7ada2a0ed7309cddd238f8bfc0e8999f145
This commit is contained in:
Pascal Massimino 2017-04-21 00:45:27 -07:00
parent 52245424b0
commit f768218966
4 changed files with 89 additions and 78 deletions

View File

@ -93,13 +93,13 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
}
// All variants implemented.
UPSAMPLE_FUNC(UpsampleRgbLinePair, VP8YuvToRgb, 3)
UPSAMPLE_FUNC(UpsampleBgrLinePair, VP8YuvToBgr, 3)
UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
UPSAMPLE_FUNC(UpsampleRgbLinePair_C, VP8YuvToRgb, 3)
UPSAMPLE_FUNC(UpsampleBgrLinePair_C, VP8YuvToBgr, 3)
UPSAMPLE_FUNC(UpsampleRgbaLinePair_C, VP8YuvToRgba, 4)
UPSAMPLE_FUNC(UpsampleBgraLinePair_C, VP8YuvToBgra, 4)
UPSAMPLE_FUNC(UpsampleArgbLinePair_C, VP8YuvToArgb, 4)
UPSAMPLE_FUNC(UpsampleRgba4444LinePair_C, VP8YuvToRgba4444, 2)
UPSAMPLE_FUNC(UpsampleRgb565LinePair_C, VP8YuvToRgb565, 2)
#undef LOAD_UV
#undef UPSAMPLE_FUNC
@ -161,13 +161,13 @@ void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
}
YUV444_FUNC(WebPYuv444ToRgbC, VP8YuvToRgb, 3)
YUV444_FUNC(WebPYuv444ToBgrC, VP8YuvToBgr, 3)
YUV444_FUNC(WebPYuv444ToRgbaC, VP8YuvToRgba, 4)
YUV444_FUNC(WebPYuv444ToBgraC, VP8YuvToBgra, 4)
YUV444_FUNC(WebPYuv444ToArgbC, VP8YuvToArgb, 4)
YUV444_FUNC(WebPYuv444ToRgba4444C, VP8YuvToRgba4444, 2)
YUV444_FUNC(WebPYuv444ToRgb565C, VP8YuvToRgb565, 2)
YUV444_FUNC(WebPYuv444ToRgb_C, VP8YuvToRgb, 3)
YUV444_FUNC(WebPYuv444ToBgr_C, VP8YuvToBgr, 3)
YUV444_FUNC(WebPYuv444ToRgba_C, VP8YuvToRgba, 4)
YUV444_FUNC(WebPYuv444ToBgra_C, VP8YuvToBgra, 4)
YUV444_FUNC(WebPYuv444ToArgb_C, VP8YuvToArgb, 4)
YUV444_FUNC(WebPYuv444ToRgba4444_C, VP8YuvToRgba4444, 2)
YUV444_FUNC(WebPYuv444ToRgb565_C, VP8YuvToRgb565, 2)
#undef YUV444_FUNC
@ -182,17 +182,17 @@ static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 =
WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return;
WebPYUV444Converters[MODE_RGB] = WebPYuv444ToRgbC;
WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgbaC;
WebPYUV444Converters[MODE_BGR] = WebPYuv444ToBgrC;
WebPYUV444Converters[MODE_BGRA] = WebPYuv444ToBgraC;
WebPYUV444Converters[MODE_ARGB] = WebPYuv444ToArgbC;
WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444C;
WebPYUV444Converters[MODE_RGB_565] = WebPYuv444ToRgb565C;
WebPYUV444Converters[MODE_rgbA] = WebPYuv444ToRgbaC;
WebPYUV444Converters[MODE_bgrA] = WebPYuv444ToBgraC;
WebPYUV444Converters[MODE_Argb] = WebPYuv444ToArgbC;
WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444C;
WebPYUV444Converters[MODE_RGB] = WebPYuv444ToRgb_C;
WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgba_C;
WebPYUV444Converters[MODE_BGR] = WebPYuv444ToBgr_C;
WebPYUV444Converters[MODE_BGRA] = WebPYuv444ToBgra_C;
WebPYUV444Converters[MODE_ARGB] = WebPYuv444ToArgb_C;
WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444_C;
WebPYUV444Converters[MODE_RGB_565] = WebPYuv444ToRgb565_C;
WebPYUV444Converters[MODE_rgbA] = WebPYuv444ToRgba_C;
WebPYUV444Converters[MODE_bgrA] = WebPYuv444ToBgra_C;
WebPYUV444Converters[MODE_Argb] = WebPYuv444ToArgb_C;
WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C;
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)
@ -224,17 +224,17 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
#ifdef FANCY_UPSAMPLING
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair;
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair;
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair;
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair;
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair_C;
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_C;
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair_C;
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair_C;
WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair_C;
WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair_C;
WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair_C;
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair_C;
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_C;
WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_C;
WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_C;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {

View File

@ -121,10 +121,10 @@ static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, \
top_dst, bottom_dst, cur_x) do { \
FUNC##32(top_y + (cur_x), r_u, r_v, top_dst + (cur_x) * XSTEP); \
FUNC##32_SSE2(top_y + (cur_x), r_u, r_v, top_dst + (cur_x) * XSTEP); \
if (bottom_y != NULL) { \
FUNC##32(bottom_y + (cur_x), r_u + 64, r_v + 64, \
bottom_dst + (cur_x) * XSTEP); \
FUNC##32_SSE2(bottom_y + (cur_x), r_u + 64, r_v + 64, \
bottom_dst + (cur_x) * XSTEP); \
} \
} while (0)
@ -213,29 +213,40 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
extern void WebPInitYUV444ConvertersSSE2(void);
#define YUV444_FUNC(FUNC_NAME, CALL, XSTEP) \
extern void WebP##FUNC_NAME##C(const uint8_t* y, const uint8_t* u, \
const uint8_t* v, uint8_t* dst, int len); \
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len); \
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len) { \
int i; \
const int max_len = len & ~31; \
for (i = 0; i < max_len; i += 32) CALL(y + i, u + i, v + i, dst + i * XSTEP);\
if (i < len) { /* C-fallback */ \
WebP##FUNC_NAME##C(y + i, u + i, v + i, dst + i * XSTEP, len - i); \
CALL_C(y + i, u + i, v + i, dst + i * XSTEP, len - i); \
} \
}
YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba32, 4);
YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra32, 4);
YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb32, 3);
YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr32, 3);
YUV444_FUNC(Yuv444ToRgba_SSE2, VP8YuvToRgba32_SSE2, WebPYuv444ToRgba_C, 4);
YUV444_FUNC(Yuv444ToBgra_SSE2, VP8YuvToBgra32_SSE2, WebPYuv444ToBgra_C, 4);
YUV444_FUNC(Yuv444ToRgb_SSE2, VP8YuvToRgb32_SSE2, WebPYuv444ToRgb_C, 3);
YUV444_FUNC(Yuv444ToBgr_SSE2, VP8YuvToBgr32_SSE2, WebPYuv444ToBgr_C, 3);
YUV444_FUNC(Yuv444ToArgb_SSE2, VP8YuvToArgb32_SSE2, WebPYuv444ToArgb_C, 4)
YUV444_FUNC(Yuv444ToRgba4444_SSE2, VP8YuvToRgba444432_SSE2, \
WebPYuv444ToRgba4444_C, 2)
YUV444_FUNC(Yuv444ToRgb565_SSE2, VP8YuvToRgb56532_SSE2, WebPYuv444ToRgb565_C, 2)
WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) {
WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba;
WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra;
WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb;
WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr;
WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba_SSE2;
WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra_SSE2;
WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb_SSE2;
WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr_SSE2;
WebPYUV444Converters[MODE_ARGB] = Yuv444ToArgb_SSE2;
WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444_SSE2;
WebPYUV444Converters[MODE_RGB_565] = Yuv444ToRgb565_SSE2;
WebPYUV444Converters[MODE_rgbA] = Yuv444ToRgba_SSE2;
WebPYUV444Converters[MODE_bgrA] = Yuv444ToBgra_SSE2;
WebPYUV444Converters[MODE_Argb] = Yuv444ToArgb_SSE2;
WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444_SSE2;
}
#else

View File

@ -166,20 +166,20 @@ void VP8YUVInit(void);
#if defined(WEBP_USE_SSE2)
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v,
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
const uint8_t* v, uint8_t* dst);
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst);
#endif // WEBP_USE_SSE2

View File

@ -186,8 +186,8 @@ static WEBP_INLINE void PlanarTo24b(__m128i* const in0, __m128i* const in1,
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
}
void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
const __m128i kAlpha = _mm_set1_epi16(255);
int n;
for (n = 0; n < 32; n += 8, dst += 32) {
@ -197,8 +197,8 @@ void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
}
}
void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
const __m128i kAlpha = _mm_set1_epi16(255);
int n;
for (n = 0; n < 32; n += 8, dst += 32) {
@ -208,8 +208,8 @@ void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
}
}
void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
const __m128i kAlpha = _mm_set1_epi16(255);
int n;
for (n = 0; n < 32; n += 8, dst += 32) {
@ -219,8 +219,8 @@ void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
}
}
void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
const uint8_t* v, uint8_t* dst) {
const __m128i kAlpha = _mm_set1_epi16(255);
int n;
for (n = 0; n < 32; n += 8, dst += 16) {
@ -230,8 +230,8 @@ void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v,
}
}
void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
int n;
for (n = 0; n < 32; n += 8, dst += 16) {
__m128i R, G, B;
@ -240,8 +240,8 @@ void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v,
}
}
void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
@ -262,8 +262,8 @@ void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
PlanarTo24b(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
}
void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) {
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;