MIPS: dspr2: added optimization for function MakeARGB32
inline function MakeARGB32 calls changed to call via pointers to functions which make (a)rgb for entire row Change-Id: Ia4bd4be171a46c1e1821e408b073ff5791c587a9
This commit is contained in:
parent
87c3d53180
commit
7ce8788b06
@ -35,6 +35,8 @@ LOCAL_SRC_FILES := \
|
|||||||
src/dsp/alpha_processing.c \
|
src/dsp/alpha_processing.c \
|
||||||
src/dsp/alpha_processing_mips_dsp_r2.c \
|
src/dsp/alpha_processing_mips_dsp_r2.c \
|
||||||
src/dsp/alpha_processing_sse2.c \
|
src/dsp/alpha_processing_sse2.c \
|
||||||
|
src/dsp/argb.c \
|
||||||
|
src/dsp/argb_mips_dsp_r2.c \
|
||||||
src/dsp/cpu.c \
|
src/dsp/cpu.c \
|
||||||
src/dsp/dec.c \
|
src/dsp/dec.c \
|
||||||
src/dsp/dec_clip_tables.c \
|
src/dsp/dec_clip_tables.c \
|
||||||
|
@ -180,6 +180,8 @@ DSP_DEC_OBJS = \
|
|||||||
$(DIROBJ)\dsp\alpha_processing.obj \
|
$(DIROBJ)\dsp\alpha_processing.obj \
|
||||||
$(DIROBJ)\dsp\alpha_processing_mips_dsp_r2.obj \
|
$(DIROBJ)\dsp\alpha_processing_mips_dsp_r2.obj \
|
||||||
$(DIROBJ)\dsp\alpha_processing_sse2.obj \
|
$(DIROBJ)\dsp\alpha_processing_sse2.obj \
|
||||||
|
$(DIROBJ)\dsp\argb.obj \
|
||||||
|
$(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
|
||||||
$(DIROBJ)\dsp\cpu.obj \
|
$(DIROBJ)\dsp\cpu.obj \
|
||||||
$(DIROBJ)\dsp\dec.obj \
|
$(DIROBJ)\dsp\dec.obj \
|
||||||
$(DIROBJ)\dsp\dec_clip_tables.obj \
|
$(DIROBJ)\dsp\dec_clip_tables.obj \
|
||||||
|
@ -110,6 +110,8 @@ DSP_DEC_OBJS = \
|
|||||||
src/dsp/alpha_processing.o \
|
src/dsp/alpha_processing.o \
|
||||||
src/dsp/alpha_processing_mips_dsp_r2.o \
|
src/dsp/alpha_processing_mips_dsp_r2.o \
|
||||||
src/dsp/alpha_processing_sse2.o \
|
src/dsp/alpha_processing_sse2.o \
|
||||||
|
src/dsp/argb.o \
|
||||||
|
src/dsp/argb_mips_dsp_r2.o \
|
||||||
src/dsp/cpu.o \
|
src/dsp/cpu.o \
|
||||||
src/dsp/dec.o \
|
src/dsp/dec.o \
|
||||||
src/dsp/dec_clip_tables.o \
|
src/dsp/dec_clip_tables.o \
|
||||||
|
@ -11,6 +11,8 @@ commondir = $(includedir)/webp
|
|||||||
COMMON_SOURCES =
|
COMMON_SOURCES =
|
||||||
COMMON_SOURCES += alpha_processing.c
|
COMMON_SOURCES += alpha_processing.c
|
||||||
COMMON_SOURCES += alpha_processing_mips_dsp_r2.c
|
COMMON_SOURCES += alpha_processing_mips_dsp_r2.c
|
||||||
|
COMMON_SOURCES += argb.c
|
||||||
|
COMMON_SOURCES += argb_mips_dsp_r2.c
|
||||||
COMMON_SOURCES += cpu.c
|
COMMON_SOURCES += cpu.c
|
||||||
COMMON_SOURCES += dec.c
|
COMMON_SOURCES += dec.c
|
||||||
COMMON_SOURCES += dec_clip_tables.c
|
COMMON_SOURCES += dec_clip_tables.c
|
||||||
|
57
src/dsp/argb.c
Normal file
57
src/dsp/argb.c
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style license
|
||||||
|
// that can be found in the COPYING file in the root of the source
|
||||||
|
// tree. An additional intellectual property rights grant can be found
|
||||||
|
// in the file PATENTS. All contributing project authors may
|
||||||
|
// be found in the AUTHORS file in the root of the source tree.
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// ARGB making functions.
|
||||||
|
//
|
||||||
|
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||||
|
|
||||||
|
#include "./dsp.h"
|
||||||
|
|
||||||
|
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
||||||
|
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||||
|
const uint8_t* b, int len, int step, uint32_t* out) {
|
||||||
|
int i, offset = 0;
|
||||||
|
for (i = 0; i < len; ++i) {
|
||||||
|
out[i] = MakeARGB32(a[offset], r[offset], g[offset], b[offset]);
|
||||||
|
offset += step;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||||
|
int len, int step, uint32_t* out) {
|
||||||
|
int i, offset = 0;
|
||||||
|
for (i = 0; i < len; ++i) {
|
||||||
|
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
|
||||||
|
offset += step;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
|
||||||
|
const uint8_t*, int, int, uint32_t*);
|
||||||
|
void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
|
||||||
|
int, int, uint32_t*);
|
||||||
|
|
||||||
|
extern void VP8EncDspARGBInitMIPSdspR2(void);
|
||||||
|
|
||||||
|
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
|
||||||
|
VP8PackARGB = PackARGB;
|
||||||
|
VP8PackRGB = PackRGB;
|
||||||
|
|
||||||
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
|
if (VP8GetCPUInfo != NULL) {
|
||||||
|
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||||
|
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||||
|
VP8EncDspARGBInitMIPSdspR2();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
107
src/dsp/argb_mips_dsp_r2.c
Normal file
107
src/dsp/argb_mips_dsp_r2.c
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style license
|
||||||
|
// that can be found in the COPYING file in the root of the source
|
||||||
|
// tree. An additional intellectual property rights grant can be found
|
||||||
|
// in the file PATENTS. All contributing project authors may
|
||||||
|
// be found in the AUTHORS file in the root of the source tree.
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// ARGB making functions (mips version).
|
||||||
|
//
|
||||||
|
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||||
|
|
||||||
|
#include "./dsp.h"
|
||||||
|
|
||||||
|
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||||
|
|
||||||
|
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
|
||||||
|
const uint8_t* b, int len, int step, uint32_t* out) {
|
||||||
|
int temp0, temp1, temp2, temp3, offset;
|
||||||
|
const int rest = len & 1;
|
||||||
|
const uint32_t* const loop_end = out + len - rest;
|
||||||
|
__asm__ volatile (
|
||||||
|
"xor %[offset], %[offset], %[offset] \n\t"
|
||||||
|
"beq %[loop_end], %[out], 0f \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||||
|
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||||
|
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||||
|
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||||
|
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||||
|
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||||
|
"addiu %[out], %[out], 4 \n\t"
|
||||||
|
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||||
|
"sw %[temp0], -4(%[out]) \n\t"
|
||||||
|
"addu %[offset], %[offset], %[step] \n\t"
|
||||||
|
"bne %[loop_end], %[out], 2b \n\t"
|
||||||
|
"0: \n\t"
|
||||||
|
"beq %[rest], $zero, 1f \n\t"
|
||||||
|
"lbux %[temp0], %[offset](%[a]) \n\t"
|
||||||
|
"lbux %[temp1], %[offset](%[r]) \n\t"
|
||||||
|
"lbux %[temp2], %[offset](%[g]) \n\t"
|
||||||
|
"lbux %[temp3], %[offset](%[b]) \n\t"
|
||||||
|
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||||
|
"ins %[temp3], %[temp2], 16, 16 \n\t"
|
||||||
|
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
|
||||||
|
"sw %[temp0], 0(%[out]) \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||||
|
[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
|
||||||
|
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||||
|
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||||
|
int len, int step, uint32_t* out) {
|
||||||
|
int temp0, temp1, temp2, offset;
|
||||||
|
const int rest = len & 1;
|
||||||
|
const int a = 0xff;
|
||||||
|
const uint32_t* const loop_end = out + len - rest;
|
||||||
|
__asm__ volatile (
|
||||||
|
"xor %[offset], %[offset], %[offset] \n\t"
|
||||||
|
"beq %[loop_end], %[out], 0f \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||||
|
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||||
|
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||||
|
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||||
|
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||||
|
"addiu %[out], %[out], 4 \n\t"
|
||||||
|
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||||
|
"sw %[temp0], -4(%[out]) \n\t"
|
||||||
|
"addu %[offset], %[offset], %[step] \n\t"
|
||||||
|
"bne %[loop_end], %[out], 2b \n\t"
|
||||||
|
"0: \n\t"
|
||||||
|
"beq %[rest], $zero, 1f \n\t"
|
||||||
|
"lbux %[temp0], %[offset](%[r]) \n\t"
|
||||||
|
"lbux %[temp1], %[offset](%[g]) \n\t"
|
||||||
|
"lbux %[temp2], %[offset](%[b]) \n\t"
|
||||||
|
"ins %[temp0], %[a], 16, 16 \n\t"
|
||||||
|
"ins %[temp2], %[temp1], 16, 16 \n\t"
|
||||||
|
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
|
||||||
|
"sw %[temp0], 0(%[out]) \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||||
|
[offset]"=&r"(offset), [out]"+&r"(out)
|
||||||
|
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
|
||||||
|
[loop_end]"r"(loop_end), [rest]"r"(rest)
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // WEBP_USE_MIPS_DSP_R2
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Entry point
|
||||||
|
|
||||||
|
extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void);
|
||||||
|
|
||||||
|
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
|
||||||
|
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||||
|
VP8PackARGB = PackARGB;
|
||||||
|
VP8PackRGB = PackRGB;
|
||||||
|
#endif // WEBP_USE_MIPS_DSP_R2
|
||||||
|
}
|
@ -337,6 +337,16 @@ void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
|
|||||||
// To be called first before using the above.
|
// To be called first before using the above.
|
||||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void);
|
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void);
|
||||||
|
|
||||||
|
// ARGB making functions.
|
||||||
|
extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
|
||||||
|
const uint8_t* g, const uint8_t* b, int len,
|
||||||
|
int step, uint32_t* out);
|
||||||
|
extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
|
||||||
|
int len, int step, uint32_t* out);
|
||||||
|
|
||||||
|
// To be called first before using the above.
|
||||||
|
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "./vp8enci.h"
|
#include "./vp8enci.h"
|
||||||
|
#include "../dsp/dsp.h"
|
||||||
#include "../utils/utils.h"
|
#include "../utils/utils.h"
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -38,6 +39,7 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) {
|
|||||||
memset(picture, 0, sizeof(*picture));
|
memset(picture, 0, sizeof(*picture));
|
||||||
picture->writer = DummyWriter;
|
picture->writer = DummyWriter;
|
||||||
WebPEncodingSetError(picture, VP8_ENC_OK);
|
WebPEncodingSetError(picture, VP8_ENC_OK);
|
||||||
|
VP8EncDspARGBInit();
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -32,10 +32,6 @@ static const union {
|
|||||||
} test_endian = { 0xff000000u };
|
} test_endian = { 0xff000000u };
|
||||||
#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
|
#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
|
||||||
|
|
||||||
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
|
|
||||||
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Detection of non-trivial transparency
|
// Detection of non-trivial transparency
|
||||||
|
|
||||||
@ -1065,13 +1061,19 @@ static int Import(WebPPicture* const picture,
|
|||||||
if (!WebPPictureAlloc(picture)) return 0;
|
if (!WebPPictureAlloc(picture)) return 0;
|
||||||
|
|
||||||
assert(step >= (import_alpha ? 4 : 3));
|
assert(step >= (import_alpha ? 4 : 3));
|
||||||
|
if (import_alpha) {
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
uint32_t* const dst = &picture->argb[y * picture->argb_stride];
|
uint32_t* const dst = &picture->argb[y * picture->argb_stride];
|
||||||
int x;
|
const int offset = y * rgb_stride;
|
||||||
for (x = 0; x < width; ++x) {
|
VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset,
|
||||||
const int offset = step * x + y * rgb_stride;
|
b_ptr + offset, width, step, dst);
|
||||||
dst[x] = MakeARGB32(import_alpha ? a_ptr[offset] : 0xff,
|
}
|
||||||
r_ptr[offset], g_ptr[offset], b_ptr[offset]);
|
} else {
|
||||||
|
for (y = 0; y < height; ++y) {
|
||||||
|
uint32_t* const dst = &picture->argb[y * picture->argb_stride];
|
||||||
|
const int offset = y * rgb_stride;
|
||||||
|
VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset,
|
||||||
|
width, step, dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user