From f0103595dd25de56e6b7b086ecd614098725425b Mon Sep 17 00:00:00 2001 From: Djordje Pesut Date: Tue, 2 Sep 2014 16:34:52 +0200 Subject: [PATCH] MIPS: dspr2: added optimization for ColorIndexInverseTransforms Change-Id: I5b6094ce489d4f896bc4b8f575142eb3c5054beb --- Android.mk | 1 + Makefile.vc | 1 + makefile.unix | 1 + src/dsp/Makefile.am | 1 + src/dsp/lossless.c | 61 ++++++++++--------- src/dsp/lossless.h | 31 ++++++++++ src/dsp/lossless_mips_dsp_r2.c | 106 +++++++++++++++++++++++++++++++++ 7 files changed, 173 insertions(+), 29 deletions(-) create mode 100644 src/dsp/lossless_mips_dsp_r2.c diff --git a/Android.mk b/Android.mk index 79fa0f5f..72f17564 100644 --- a/Android.mk +++ b/Android.mk @@ -49,6 +49,7 @@ LOCAL_SRC_FILES := \ src/dsp/filters_mips_dsp_r2.c \ src/dsp/lossless.c \ src/dsp/lossless_mips32.c \ + src/dsp/lossless_mips_dsp_r2.c \ src/dsp/lossless_neon.$(NEON) \ src/dsp/lossless_sse2.c \ src/dsp/upsampling.c \ diff --git a/Makefile.vc b/Makefile.vc index 9c76fc8f..8c73ef6c 100644 --- a/Makefile.vc +++ b/Makefile.vc @@ -183,6 +183,7 @@ DSP_DEC_OBJS = \ $(DIROBJ)\dsp\filters_mips_dsp_r2.obj \ $(DIROBJ)\dsp\lossless.obj \ $(DIROBJ)\dsp\lossless_mips32.obj \ + $(DIROBJ)\dsp\lossless_mips_dsp_r2.obj \ $(DIROBJ)\dsp\lossless_neon.obj \ $(DIROBJ)\dsp\lossless_sse2.obj \ $(DIROBJ)\dsp\upsampling.obj \ diff --git a/makefile.unix b/makefile.unix index bcc3a699..771c2559 100644 --- a/makefile.unix +++ b/makefile.unix @@ -119,6 +119,7 @@ DSP_DEC_OBJS = \ src/dsp/filters_mips_dsp_r2.o \ src/dsp/lossless.o \ src/dsp/lossless_mips32.o \ + src/dsp/lossless_mips_dsp_r2.o \ src/dsp/lossless_neon.o \ src/dsp/lossless_sse2.o \ src/dsp/upsampling.o \ diff --git a/src/dsp/Makefile.am b/src/dsp/Makefile.am index cbc759d4..651f923d 100644 --- a/src/dsp/Makefile.am +++ b/src/dsp/Makefile.am @@ -21,6 +21,7 @@ COMMON_SOURCES += filters_mips_dsp_r2.c COMMON_SOURCES += lossless.c COMMON_SOURCES += lossless.h COMMON_SOURCES += lossless_mips32.c +COMMON_SOURCES += lossless_mips_dsp_r2.c COMMON_SOURCES += lossless_neon.c COMMON_SOURCES += neon.h COMMON_SOURCES += upsampling.c diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index a1bf3584..084c9230 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -1184,9 +1184,21 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform, // Separate out pixels packed together using pixel-bundling. // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t). -#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ -void FUNC_NAME(const VP8LTransform* const transform, \ - int y_start, int y_end, const TYPE* src, TYPE* dst) { \ +#define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX, \ + GET_INDEX, GET_VALUE) \ +static void F_NAME(const TYPE* src, const uint32_t* const color_map, \ + TYPE* dst, int y_start, int y_end, int width) { \ + int y; \ + for (y = y_start; y < y_end; ++y) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ + } \ + } \ +} \ +STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \ + int y_start, int y_end, const TYPE* src, \ + TYPE* dst) { \ int y; \ const int bits_per_pixel = 8 >> transform->bits_; \ const int width = transform->xsize_; \ @@ -1209,35 +1221,14 @@ void FUNC_NAME(const VP8LTransform* const transform, \ } \ } \ } else { \ - for (y = y_start; y < y_end; ++y) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ - } \ - } \ + VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width); \ } \ } -static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) { - return (idx >> 8) & 0xff; -} - -static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) { - return idx; -} - -static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) { - return val; -} - -static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) { - return (val >> 8) & 0xff; -} - -static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex, - GetARGBValue) -COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex, - GetAlphaValue) +COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b, + VP8GetARGBIndex, VP8GetARGBValue) +COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t, + 8b, VP8GetAlphaIndex, VP8GetAlphaValue) #undef COLOR_INDEX_INVERSE @@ -1586,9 +1577,13 @@ VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount; VP8LHistogramAddFunc VP8LHistogramAdd; +VP8LMapARGBFunc VP8LMapColor32b; +VP8LMapAlphaFunc VP8LMapColor8b; + extern void VP8LDspInitSSE2(void); extern void VP8LDspInitNEON(void); extern void VP8LDspInitMIPS32(void); +extern void VP8LDspInitMIPSdspR2(void); void VP8LDspInit(void) { memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors)); @@ -1616,6 +1611,9 @@ void VP8LDspInit(void) { VP8LHistogramAdd = HistogramAdd; + VP8LMapColor32b = MapARGB; + VP8LMapColor8b = MapAlpha; + // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) @@ -1632,6 +1630,11 @@ void VP8LDspInit(void) { if (VP8GetCPUInfo(kMIPS32)) { VP8LDspInitMIPS32(); } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8LDspInitMIPSdspR2(); + } #endif } } diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 8c7551c9..69c8ac69 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -163,6 +163,37 @@ typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a, VP8LHistogram* const out); extern VP8LHistogramAddFunc VP8LHistogramAdd; +// ----------------------------------------------------------------------------- +// color mapping related functions. + +static WEBP_INLINE uint32_t VP8GetARGBIndex(uint32_t idx) { + return (idx >> 8) & 0xff; +} + +static WEBP_INLINE uint8_t VP8GetAlphaIndex(uint8_t idx) { + return idx; +} + +static WEBP_INLINE uint32_t VP8GetARGBValue(uint32_t val) { + return val; +} + +static WEBP_INLINE uint8_t VP8GetAlphaValue(uint32_t val) { + return (val >> 8) & 0xff; +} + +typedef void (*VP8LMapARGBFunc)(const uint32_t* src, + const uint32_t* const color_map, + uint32_t* dst, int y_start, + int y_end, int width); +typedef void (*VP8LMapAlphaFunc)(const uint8_t* src, + const uint32_t* const color_map, + uint8_t* dst, int y_start, + int y_end, int width); + +extern VP8LMapARGBFunc VP8LMapColor32b; +extern VP8LMapAlphaFunc VP8LMapColor8b; + // ----------------------------------------------------------------------------- // PrefixEncode() diff --git a/src/dsp/lossless_mips_dsp_r2.c b/src/dsp/lossless_mips_dsp_r2.c new file mode 100644 index 00000000..507f7366 --- /dev/null +++ b/src/dsp/lossless_mips_dsp_r2.c @@ -0,0 +1,106 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Image transforms and color space conversion methods for lossless decoder. +// +// Author(s): Djordje Pesut (djordje.pesut@imgtec.com) +// Jovan Zelincevic (jovan.zelincevic@imgtec.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_MIPS_DSP_R2) + +#include "./lossless.h" + +#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ +static void FUNC_NAME(const TYPE* src, \ + const uint32_t* const color_map, \ + TYPE* dst, int y_start, int y_end, \ + int width) { \ + int y; \ + for (y = y_start; y < y_end; ++y) { \ + int x; \ + for (x = 0; x < (width >> 2); ++x) { \ + int tmp1, tmp2, tmp3, tmp4; \ + __asm__ volatile ( \ + ".ifc "#TYPE", uint8_t \n\t" \ + "lbu %[tmp1], 0(%[src]) \n\t" \ + "lbu %[tmp2], 1(%[src]) \n\t" \ + "lbu %[tmp3], 2(%[src]) \n\t" \ + "lbu %[tmp4], 3(%[src]) \n\t" \ + "addiu %[src], %[src], 4 \n\t" \ + ".endif \n\t" \ + ".ifc "#TYPE", uint32_t \n\t" \ + "lw %[tmp1], 0(%[src]) \n\t" \ + "lw %[tmp2], 4(%[src]) \n\t" \ + "lw %[tmp3], 8(%[src]) \n\t" \ + "lw %[tmp4], 12(%[src]) \n\t" \ + "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ + "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ + "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ + "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ + "addiu %[src], %[src], 16 \n\t" \ + ".endif \n\t" \ + "sll %[tmp1], %[tmp1], 2 \n\t" \ + "sll %[tmp2], %[tmp2], 2 \n\t" \ + "sll %[tmp3], %[tmp3], 2 \n\t" \ + "sll %[tmp4], %[tmp4], 2 \n\t" \ + "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \ + "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \ + "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \ + "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \ + ".ifc "#TYPE", uint8_t \n\t" \ + "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ + "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ + "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ + "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ + "sb %[tmp1], 0(%[dst]) \n\t" \ + "sb %[tmp2], 1(%[dst]) \n\t" \ + "sb %[tmp3], 2(%[dst]) \n\t" \ + "sb %[tmp4], 3(%[dst]) \n\t" \ + "addiu %[dst], %[dst], 4 \n\t" \ + ".endif \n\t" \ + ".ifc "#TYPE", uint32_t \n\t" \ + "sw %[tmp1], 0(%[dst]) \n\t" \ + "sw %[tmp2], 4(%[dst]) \n\t" \ + "sw %[tmp3], 8(%[dst]) \n\t" \ + "sw %[tmp4], 12(%[dst]) \n\t" \ + "addiu %[dst], %[dst], 16 \n\t" \ + ".endif \n\t" \ + : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \ + [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \ + : [color_map]"r"(color_map) \ + : "memory" \ + ); \ + } \ + for (x = 0; x < (width & 3); ++x) { \ + *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ + } \ + } \ +} + +MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue) +MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue) + +#undef MAP_COLOR_FUNCS + +#endif // WEBP_USE_MIPS_DSP_R2 + +//------------------------------------------------------------------------------ + +extern void VP8LDspInitMIPSdspR2(void); + +void VP8LDspInitMIPSdspR2(void) { +#if defined(WEBP_USE_MIPS_DSP_R2) + VP8LMapColor32b = MapARGB; + VP8LMapColor8b = MapAlpha; +#endif // WEBP_USE_MIPS_DSP_R2 +} + +//------------------------------------------------------------------------------