SSE2: add yuv444 converters, re-using yuv_sse2.c

Change-Id: I4d5c9df8a4c8e8cb8b5daa537af07382894503a8
This commit is contained in:
skal 2015-08-17 21:15:37 -07:00
parent 41a5d99d55
commit bd55604d1b
4 changed files with 63 additions and 26 deletions

View File

@ -153,25 +153,28 @@ WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
// YUV444 converter // YUV444 converter
#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \ #define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \ extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len); \
void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len) { \ uint8_t* dst, int len) { \
int i; \ int i; \
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \ for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
} }
YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb, 3) YUV444_FUNC(WebPYuv444ToRgbC, VP8YuvToRgb, 3)
YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr, 3) YUV444_FUNC(WebPYuv444ToBgrC, VP8YuvToBgr, 3)
YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba, 4) YUV444_FUNC(WebPYuv444ToRgbaC, VP8YuvToRgba, 4)
YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra, 4) YUV444_FUNC(WebPYuv444ToBgraC, VP8YuvToBgra, 4)
YUV444_FUNC(Yuv444ToArgb, VP8YuvToArgb, 4) YUV444_FUNC(WebPYuv444ToArgbC, VP8YuvToArgb, 4)
YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2) YUV444_FUNC(WebPYuv444ToRgba4444C, VP8YuvToRgba4444, 2)
YUV444_FUNC(Yuv444ToRgb565, VP8YuvToRgb565, 2) YUV444_FUNC(WebPYuv444ToRgb565C, VP8YuvToRgb565, 2)
#undef YUV444_FUNC #undef YUV444_FUNC
WebPYUV444Converter WebPYUV444Converters[MODE_LAST]; WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
extern void WebPInitYUV444ConvertersMIPSdspR2(void); extern void WebPInitYUV444ConvertersMIPSdspR2(void);
extern void WebPInitYUV444ConvertersSSE2(void);
static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 = static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 =
(VP8CPUInfo)&upsampling_last_cpuinfo_used1; (VP8CPUInfo)&upsampling_last_cpuinfo_used1;
@ -179,19 +182,24 @@ static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 =
WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) { WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return; if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return;
WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb; WebPYUV444Converters[MODE_RGB] = WebPYuv444ToRgbC;
WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba; WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgbaC;
WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr; WebPYUV444Converters[MODE_BGR] = WebPYuv444ToBgrC;
WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra; WebPYUV444Converters[MODE_BGRA] = WebPYuv444ToBgraC;
WebPYUV444Converters[MODE_ARGB] = Yuv444ToArgb; WebPYUV444Converters[MODE_ARGB] = WebPYuv444ToArgbC;
WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444; WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444C;
WebPYUV444Converters[MODE_RGB_565] = Yuv444ToRgb565; WebPYUV444Converters[MODE_RGB_565] = WebPYuv444ToRgb565C;
WebPYUV444Converters[MODE_rgbA] = Yuv444ToRgba; WebPYUV444Converters[MODE_rgbA] = WebPYuv444ToRgbaC;
WebPYUV444Converters[MODE_bgrA] = Yuv444ToBgra; WebPYUV444Converters[MODE_bgrA] = WebPYuv444ToBgraC;
WebPYUV444Converters[MODE_Argb] = Yuv444ToArgb; WebPYUV444Converters[MODE_Argb] = WebPYuv444ToArgbC;
WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444; WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444C;
if (VP8GetCPUInfo != NULL) { if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)
if (VP8GetCPUInfo(kSSE2)) {
WebPInitYUV444ConvertersSSE2();
}
#endif
#if defined(WEBP_USE_MIPS_DSP_R2) #if defined(WEBP_USE_MIPS_DSP_R2)
if (VP8GetCPUInfo(kMIPSdspR2)) { if (VP8GetCPUInfo(kMIPSdspR2)) {
WebPInitYUV444ConvertersMIPSdspR2(); WebPInitYUV444ConvertersMIPSdspR2();

View File

@ -201,6 +201,41 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
#endif // FANCY_UPSAMPLING #endif // FANCY_UPSAMPLING
//------------------------------------------------------------------------------
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
extern void WebPInitYUV444ConvertersSSE2(void);
#define YUV444_FUNC(FUNC_NAME, CALL, XSTEP) \
extern void WebP##FUNC_NAME##C(const uint8_t* y, const uint8_t* u, \
const uint8_t* v, uint8_t* dst, int len); \
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len) { \
int i; \
const int max_len = len & ~31; \
for (i = 0; i < max_len; i += 32) CALL(y + i, u + i, v + i, dst + i * XSTEP);\
if (i < len) { /* C-fallback */ \
WebP##FUNC_NAME##C(y + i, u + i, v + i, dst + i * XSTEP, len - i); \
} \
}
YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba32, 4);
YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra32, 4);
YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb32, 3);
YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr32, 3);
WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) {
VP8YUVInitSSE2();
WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba;
WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra;
WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb;
WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr;
}
#else
WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE2)
#endif // WEBP_USE_SSE2 #endif // WEBP_USE_SSE2
#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE2)) #if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE2))

View File

@ -249,7 +249,6 @@ void VP8YUVInit(void);
// to the binary size. Otherwise, they are initialized at run-time (small cost). // to the binary size. Otherwise, they are initialized at run-time (small cost).
#define WEBP_YUV_USE_SSE2_TABLES #define WEBP_YUV_USE_SSE2_TABLES
#if defined(FANCY_UPSAMPLING)
// Process 32 pixels and store the result (24b or 32b per pixel) in *dst. // Process 32 pixels and store the result (24b or 32b per pixel) in *dst.
void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst); uint8_t* dst);
@ -259,7 +258,6 @@ void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst); uint8_t* dst);
void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst); uint8_t* dst);
#endif // FANCY_UPSAMPLING
// Must be called to initialize tables before using the functions. // Must be called to initialize tables before using the functions.
void VP8YUVInitSSE2(void); void VP8YUVInitSSE2(void);

View File

@ -125,8 +125,6 @@ static WEBP_INLINE void YuvToBgrSSE2(uint8_t y, uint8_t u, uint8_t v,
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler. // Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
#ifdef FANCY_UPSAMPLING
void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
uint8_t* dst) { uint8_t* dst) {
int n; int n;
@ -186,8 +184,6 @@ void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
memcpy(dst + n * 3, tmp, 2 * 3); memcpy(dst + n * 3, tmp, 2 * 3);
} }
#endif // FANCY_UPSAMPLING
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Arbitrary-length row conversion functions // Arbitrary-length row conversion functions