MIPS: dspr2: added optimization for function MakeARGB32

inline function MakeARGB32 calls changed to call
via pointers to functions which make (a)rgb for
entire row

Change-Id: Ia4bd4be171a46c1e1821e408b073ff5791c587a9
This commit is contained in:
Djordje Pesut 2014-11-20 16:53:05 +01:00
parent 87c3d53180
commit 7ce8788b06
9 changed files with 197 additions and 11 deletions

View File

@ -35,6 +35,8 @@ LOCAL_SRC_FILES := \
src/dsp/alpha_processing.c \ src/dsp/alpha_processing.c \
src/dsp/alpha_processing_mips_dsp_r2.c \ src/dsp/alpha_processing_mips_dsp_r2.c \
src/dsp/alpha_processing_sse2.c \ src/dsp/alpha_processing_sse2.c \
src/dsp/argb.c \
src/dsp/argb_mips_dsp_r2.c \
src/dsp/cpu.c \ src/dsp/cpu.c \
src/dsp/dec.c \ src/dsp/dec.c \
src/dsp/dec_clip_tables.c \ src/dsp/dec_clip_tables.c \

View File

@ -180,6 +180,8 @@ DSP_DEC_OBJS = \
$(DIROBJ)\dsp\alpha_processing.obj \ $(DIROBJ)\dsp\alpha_processing.obj \
$(DIROBJ)\dsp\alpha_processing_mips_dsp_r2.obj \ $(DIROBJ)\dsp\alpha_processing_mips_dsp_r2.obj \
$(DIROBJ)\dsp\alpha_processing_sse2.obj \ $(DIROBJ)\dsp\alpha_processing_sse2.obj \
$(DIROBJ)\dsp\argb.obj \
$(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
$(DIROBJ)\dsp\cpu.obj \ $(DIROBJ)\dsp\cpu.obj \
$(DIROBJ)\dsp\dec.obj \ $(DIROBJ)\dsp\dec.obj \
$(DIROBJ)\dsp\dec_clip_tables.obj \ $(DIROBJ)\dsp\dec_clip_tables.obj \

View File

@ -110,6 +110,8 @@ DSP_DEC_OBJS = \
src/dsp/alpha_processing.o \ src/dsp/alpha_processing.o \
src/dsp/alpha_processing_mips_dsp_r2.o \ src/dsp/alpha_processing_mips_dsp_r2.o \
src/dsp/alpha_processing_sse2.o \ src/dsp/alpha_processing_sse2.o \
src/dsp/argb.o \
src/dsp/argb_mips_dsp_r2.o \
src/dsp/cpu.o \ src/dsp/cpu.o \
src/dsp/dec.o \ src/dsp/dec.o \
src/dsp/dec_clip_tables.o \ src/dsp/dec_clip_tables.o \

View File

@ -11,6 +11,8 @@ commondir = $(includedir)/webp
COMMON_SOURCES = COMMON_SOURCES =
COMMON_SOURCES += alpha_processing.c COMMON_SOURCES += alpha_processing.c
COMMON_SOURCES += alpha_processing_mips_dsp_r2.c COMMON_SOURCES += alpha_processing_mips_dsp_r2.c
COMMON_SOURCES += argb.c
COMMON_SOURCES += argb_mips_dsp_r2.c
COMMON_SOURCES += cpu.c COMMON_SOURCES += cpu.c
COMMON_SOURCES += dec.c COMMON_SOURCES += dec.c
COMMON_SOURCES += dec_clip_tables.c COMMON_SOURCES += dec_clip_tables.c

57
src/dsp/argb.c Normal file
View File

@ -0,0 +1,57 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// ARGB making functions.
//
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
}
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
const uint8_t* b, int len, int step, uint32_t* out) {
int i, offset = 0;
for (i = 0; i < len; ++i) {
out[i] = MakeARGB32(a[offset], r[offset], g[offset], b[offset]);
offset += step;
}
}
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out) {
int i, offset = 0;
for (i = 0; i < len; ++i) {
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
offset += step;
}
}
void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
const uint8_t*, int, int, uint32_t*);
void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
int, int, uint32_t*);
extern void VP8EncDspARGBInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
VP8PackARGB = PackARGB;
VP8PackRGB = PackRGB;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_MIPS_DSP_R2)
if (VP8GetCPUInfo(kMIPSdspR2)) {
VP8EncDspARGBInitMIPSdspR2();
}
#endif
}
}

107
src/dsp/argb_mips_dsp_r2.c Normal file
View File

@ -0,0 +1,107 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// ARGB making functions (mips version).
//
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
const uint8_t* b, int len, int step, uint32_t* out) {
int temp0, temp1, temp2, temp3, offset;
const int rest = len & 1;
const uint32_t* const loop_end = out + len - rest;
__asm__ volatile (
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
[loop_end]"r"(loop_end), [rest]"r"(rest)
: "memory"
);
}
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out) {
int temp0, temp1, temp2, offset;
const int rest = len & 1;
const int a = 0xff;
const uint32_t* const loop_end = out + len - rest;
__asm__ volatile (
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[offset]"=&r"(offset), [out]"+&r"(out)
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
[loop_end]"r"(loop_end), [rest]"r"(rest)
: "memory"
);
}
#endif // WEBP_USE_MIPS_DSP_R2
//------------------------------------------------------------------------------
// Entry point
extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
#if defined(WEBP_USE_MIPS_DSP_R2)
VP8PackARGB = PackARGB;
VP8PackRGB = PackRGB;
#endif // WEBP_USE_MIPS_DSP_R2
}

View File

@ -337,6 +337,16 @@ void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
// To be called first before using the above. // To be called first before using the above.
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void); WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void);
// ARGB making functions.
extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
const uint8_t* g, const uint8_t* b, int len,
int step, uint32_t* out);
extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out);
// To be called first before using the above.
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif

View File

@ -15,6 +15,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "./vp8enci.h" #include "./vp8enci.h"
#include "../dsp/dsp.h"
#include "../utils/utils.h" #include "../utils/utils.h"
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -38,6 +39,7 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) {
memset(picture, 0, sizeof(*picture)); memset(picture, 0, sizeof(*picture));
picture->writer = DummyWriter; picture->writer = DummyWriter;
WebPEncodingSetError(picture, VP8_ENC_OK); WebPEncodingSetError(picture, VP8_ENC_OK);
VP8EncDspARGBInit();
} }
return 1; return 1;
} }

View File

@ -32,10 +32,6 @@ static const union {
} test_endian = { 0xff000000u }; } test_endian = { 0xff000000u };
#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) #define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Detection of non-trivial transparency // Detection of non-trivial transparency
@ -1065,13 +1061,19 @@ static int Import(WebPPicture* const picture,
if (!WebPPictureAlloc(picture)) return 0; if (!WebPPictureAlloc(picture)) return 0;
assert(step >= (import_alpha ? 4 : 3)); assert(step >= (import_alpha ? 4 : 3));
if (import_alpha) {
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
uint32_t* const dst = &picture->argb[y * picture->argb_stride]; uint32_t* const dst = &picture->argb[y * picture->argb_stride];
int x; const int offset = y * rgb_stride;
for (x = 0; x < width; ++x) { VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset,
const int offset = step * x + y * rgb_stride; b_ptr + offset, width, step, dst);
dst[x] = MakeARGB32(import_alpha ? a_ptr[offset] : 0xff, }
r_ptr[offset], g_ptr[offset], b_ptr[offset]); } else {
for (y = 0; y < height; ++y) {
uint32_t* const dst = &picture->argb[y * picture->argb_stride];
const int offset = y * rgb_stride;
VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset,
width, step, dst);
} }
} }
return 1; return 1;