MIPS: MIPS32r1: Added optimizations for FastLog2
Functions VP8LFastLog2Slow and VP8LFastSLog2Slow also: replaced some "% y" by "& (y-1)" in the C-version (since y is a power-of-two) Change-Id: I875170384e3c333812ca42d6ce7278aecabd60f0
This commit is contained in:
parent
3fe0291530
commit
baabf1ea3a
@ -33,6 +33,7 @@ LOCAL_SRC_FILES := \
|
|||||||
src/dsp/enc_mips32.c \
|
src/dsp/enc_mips32.c \
|
||||||
src/dsp/enc_sse2.c \
|
src/dsp/enc_sse2.c \
|
||||||
src/dsp/lossless.c \
|
src/dsp/lossless.c \
|
||||||
|
src/dsp/lossless_mips32.c \
|
||||||
src/dsp/lossless_sse2.c \
|
src/dsp/lossless_sse2.c \
|
||||||
src/dsp/upsampling.c \
|
src/dsp/upsampling.c \
|
||||||
src/dsp/upsampling_mips32.c \
|
src/dsp/upsampling_mips32.c \
|
||||||
|
@ -174,6 +174,7 @@ DSP_DEC_OBJS = \
|
|||||||
$(DIROBJ)\dsp\dec_neon.obj \
|
$(DIROBJ)\dsp\dec_neon.obj \
|
||||||
$(DIROBJ)\dsp\dec_sse2.obj \
|
$(DIROBJ)\dsp\dec_sse2.obj \
|
||||||
$(DIROBJ)\dsp\lossless.obj \
|
$(DIROBJ)\dsp\lossless.obj \
|
||||||
|
$(DIROBJ)\dsp\lossless_mips32.obj \
|
||||||
$(DIROBJ)\dsp\lossless_neon.obj \
|
$(DIROBJ)\dsp\lossless_neon.obj \
|
||||||
$(DIROBJ)\dsp\lossless_sse2.obj \
|
$(DIROBJ)\dsp\lossless_sse2.obj \
|
||||||
$(DIROBJ)\dsp\upsampling.obj \
|
$(DIROBJ)\dsp\upsampling.obj \
|
||||||
|
@ -109,6 +109,7 @@ DSP_DEC_OBJS = \
|
|||||||
src/dsp/dec_neon.o \
|
src/dsp/dec_neon.o \
|
||||||
src/dsp/dec_sse2.o \
|
src/dsp/dec_sse2.o \
|
||||||
src/dsp/lossless.o \
|
src/dsp/lossless.o \
|
||||||
|
src/dsp/lossless_mips32.o \
|
||||||
src/dsp/lossless_neon.o \
|
src/dsp/lossless_neon.o \
|
||||||
src/dsp/lossless_sse2.o \
|
src/dsp/lossless_sse2.o \
|
||||||
src/dsp/upsampling.o \
|
src/dsp/upsampling.o \
|
||||||
|
@ -17,6 +17,7 @@ COMMON_SOURCES += dec_neon.c
|
|||||||
COMMON_SOURCES += dec_sse2.c
|
COMMON_SOURCES += dec_sse2.c
|
||||||
COMMON_SOURCES += dsp.h
|
COMMON_SOURCES += dsp.h
|
||||||
COMMON_SOURCES += lossless.c
|
COMMON_SOURCES += lossless.c
|
||||||
|
COMMON_SOURCES += lossless_mips32.c
|
||||||
COMMON_SOURCES += lossless_neon.c
|
COMMON_SOURCES += lossless_neon.c
|
||||||
COMMON_SOURCES += lossless_sse2.c
|
COMMON_SOURCES += lossless_sse2.c
|
||||||
COMMON_SOURCES += lossless.h
|
COMMON_SOURCES += lossless.h
|
||||||
|
@ -332,7 +332,7 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
|
|||||||
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
||||||
#define APPROX_LOG_MAX 4096
|
#define APPROX_LOG_MAX 4096
|
||||||
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||||
float VP8LFastSLog2Slow(int v) {
|
static float FastSLog2Slow(int v) {
|
||||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
int log_cnt = 0;
|
int log_cnt = 0;
|
||||||
@ -351,14 +351,14 @@ float VP8LFastSLog2Slow(int v) {
|
|||||||
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
||||||
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
||||||
// LOG_2_RECIPROCAL ~ 23/16
|
// LOG_2_RECIPROCAL ~ 23/16
|
||||||
correction = (23 * (orig_v % y)) >> 4;
|
correction = (23 * (orig_v & (y - 1))) >> 4;
|
||||||
return v_f * (kLog2Table[v] + log_cnt) + correction;
|
return v_f * (kLog2Table[v] + log_cnt) + correction;
|
||||||
} else {
|
} else {
|
||||||
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float VP8LFastLog2Slow(int v) {
|
static float FastLog2Slow(int v) {
|
||||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
int log_cnt = 0;
|
int log_cnt = 0;
|
||||||
@ -374,7 +374,7 @@ float VP8LFastLog2Slow(int v) {
|
|||||||
if (orig_v >= APPROX_LOG_MAX) {
|
if (orig_v >= APPROX_LOG_MAX) {
|
||||||
// Since the division is still expensive, add this correction factor only
|
// Since the division is still expensive, add this correction factor only
|
||||||
// for large values of 'v'.
|
// for large values of 'v'.
|
||||||
const int correction = (23 * (orig_v % y)) >> 4;
|
const int correction = (23 * (orig_v & (y - 1))) >> 4;
|
||||||
log_2 += (double)correction / orig_v;
|
log_2 += (double)correction / orig_v;
|
||||||
}
|
}
|
||||||
return (float)log_2;
|
return (float)log_2;
|
||||||
@ -1473,8 +1473,12 @@ VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
|||||||
VP8LConvertFunc VP8LConvertBGRAToRGB565;
|
VP8LConvertFunc VP8LConvertBGRAToRGB565;
|
||||||
VP8LConvertFunc VP8LConvertBGRAToBGR;
|
VP8LConvertFunc VP8LConvertBGRAToBGR;
|
||||||
|
|
||||||
|
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||||
|
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||||
|
|
||||||
extern void VP8LDspInitSSE2(void);
|
extern void VP8LDspInitSSE2(void);
|
||||||
extern void VP8LDspInitNEON(void);
|
extern void VP8LDspInitNEON(void);
|
||||||
|
extern void VP8LDspInitMIPS32(void);
|
||||||
|
|
||||||
void VP8LDspInit(void) {
|
void VP8LDspInit(void) {
|
||||||
memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
|
memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
|
||||||
@ -1491,6 +1495,9 @@ void VP8LDspInit(void) {
|
|||||||
VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
|
VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
|
||||||
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
||||||
|
|
||||||
|
VP8LFastLog2Slow = FastLog2Slow;
|
||||||
|
VP8LFastSLog2Slow = FastSLog2Slow;
|
||||||
|
|
||||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
if (VP8GetCPUInfo != NULL) {
|
if (VP8GetCPUInfo != NULL) {
|
||||||
#if defined(WEBP_USE_SSE2)
|
#if defined(WEBP_USE_SSE2)
|
||||||
@ -1502,6 +1509,11 @@ void VP8LDspInit(void) {
|
|||||||
if (VP8GetCPUInfo(kNEON)) {
|
if (VP8GetCPUInfo(kNEON)) {
|
||||||
VP8LDspInitNEON();
|
VP8LDspInitNEON();
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(WEBP_USE_MIPS32)
|
||||||
|
if (VP8GetCPUInfo(kMIPS32)) {
|
||||||
|
VP8LDspInitMIPS32();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -122,8 +122,11 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
|
|||||||
#define LOG_LOOKUP_IDX_MAX 256
|
#define LOG_LOOKUP_IDX_MAX 256
|
||||||
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||||
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||||
float VP8LFastLog2Slow(int v);
|
typedef float (*VP8LFastLog2SlowFunc)(int v);
|
||||||
float VP8LFastSLog2Slow(int v);
|
|
||||||
|
extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||||
|
extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||||
|
|
||||||
static WEBP_INLINE float VP8LFastLog2(int v) {
|
static WEBP_INLINE float VP8LFastLog2(int v) {
|
||||||
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
|
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
|
||||||
}
|
}
|
||||||
|
108
src/dsp/lossless_mips32.c
Normal file
108
src/dsp/lossless_mips32.c
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style license
|
||||||
|
// that can be found in the COPYING file in the root of the source
|
||||||
|
// tree. An additional intellectual property rights grant can be found
|
||||||
|
// in the file PATENTS. All contributing project authors may
|
||||||
|
// be found in the AUTHORS file in the root of the source tree.
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// MIPS version of lossless functions
|
||||||
|
//
|
||||||
|
// Author(s): Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||||
|
|
||||||
|
#include "./dsp.h"
|
||||||
|
#include "./lossless.h"
|
||||||
|
|
||||||
|
#if defined(WEBP_USE_MIPS32)
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
||||||
|
#define APPROX_LOG_MAX 4096
|
||||||
|
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||||
|
|
||||||
|
static float FastSLog2SlowMIPS32(int v) {
|
||||||
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
|
int log_cnt, y, correction;
|
||||||
|
const int c24 = 24;
|
||||||
|
const float v_f = (float)v;
|
||||||
|
int temp;
|
||||||
|
|
||||||
|
// Xf = 256 = 2^8
|
||||||
|
// log_cnt is index of leading one in upper 24 bits
|
||||||
|
__asm__ volatile(
|
||||||
|
"clz %[log_cnt], %[v] \n\t"
|
||||||
|
"addiu %[y], $zero, 1 \n\t"
|
||||||
|
"subu %[log_cnt], %[c24], %[log_cnt] \n\t"
|
||||||
|
"sllv %[y], %[y], %[log_cnt] \n\t"
|
||||||
|
"srlv %[temp], %[v], %[log_cnt] \n\t"
|
||||||
|
: [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
|
||||||
|
[temp]"=r"(temp)
|
||||||
|
: [c24]"r"(c24), [v]"r"(v)
|
||||||
|
);
|
||||||
|
|
||||||
|
// vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
|
||||||
|
// Xf = floor(Xf) * (1 + (v % y) / v)
|
||||||
|
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
||||||
|
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
||||||
|
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
||||||
|
// LOG_2_RECIPROCAL ~ 23/16
|
||||||
|
|
||||||
|
// (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
|
||||||
|
correction = (23 * (v & (y - 1))) >> 4;
|
||||||
|
return v_f * (kLog2Table[temp] + log_cnt) + correction;
|
||||||
|
} else {
|
||||||
|
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static float FastLog2SlowMIPS32(int v) {
|
||||||
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
|
int log_cnt, y;
|
||||||
|
const int c24 = 24;
|
||||||
|
double log_2;
|
||||||
|
int temp;
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"clz %[log_cnt], %[v] \n\t"
|
||||||
|
"addiu %[y], $zero, 1 \n\t"
|
||||||
|
"subu %[log_cnt], %[c24], %[log_cnt] \n\t"
|
||||||
|
"sllv %[y], %[y], %[log_cnt] \n\t"
|
||||||
|
"srlv %[temp], %[v], %[log_cnt] \n\t"
|
||||||
|
: [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
|
||||||
|
[temp]"=r"(temp)
|
||||||
|
: [c24]"r"(c24), [v]"r"(v)
|
||||||
|
);
|
||||||
|
|
||||||
|
log_2 = kLog2Table[temp] + log_cnt;
|
||||||
|
if (v >= APPROX_LOG_MAX) {
|
||||||
|
// Since the division is still expensive, add this correction factor only
|
||||||
|
// for large values of 'v'.
|
||||||
|
|
||||||
|
const int correction = (23 * (v & (y - 1))) >> 4;
|
||||||
|
log_2 += (double)correction / v;
|
||||||
|
}
|
||||||
|
return (float)log_2;
|
||||||
|
} else {
|
||||||
|
return (float)(LOG_2_RECIPROCAL * log((double)v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // WEBP_USE_MIPS32
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Entry point
|
||||||
|
|
||||||
|
extern void VP8LDspInitMIPS32(void);
|
||||||
|
|
||||||
|
void VP8LDspInitMIPS32(void) {
|
||||||
|
#if defined(WEBP_USE_MIPS32)
|
||||||
|
VP8LFastSLog2Slow = FastSLog2SlowMIPS32;
|
||||||
|
VP8LFastLog2Slow = FastLog2SlowMIPS32;
|
||||||
|
#endif // WEBP_USE_MIPS32
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user