Remove memcpy in lossless decoding.
Change-Id: Iba694b306486d67764e2fc5576c98a974c9b886c
This commit is contained in:
parent
7474d46e45
commit
71e2f5cadf
@ -712,13 +712,15 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
|
||||
uint32_t* const rows_out = dec->argb_cache_;
|
||||
|
||||
// Inverse transforms.
|
||||
// TODO: most transforms only need to operate on the cropped region only.
|
||||
memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
|
||||
while (n-- > 0) {
|
||||
VP8LTransform* const transform = &dec->transforms_[n];
|
||||
VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
|
||||
rows_in = rows_out;
|
||||
}
|
||||
if (rows_in != rows_out) {
|
||||
// No transform called, hence just copy.
|
||||
memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
|
||||
}
|
||||
}
|
||||
|
||||
// Processes (transforms, scales & color-converts) the rows decoded after the
|
||||
|
@ -234,15 +234,16 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
|
||||
|
||||
// Add green to blue and red channels (i.e. perform the inverse transform of
|
||||
// 'subtract green').
|
||||
void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
|
||||
void VP8LAddGreenToBlueAndRed_C(const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
const uint32_t argb = src[i];
|
||||
const uint32_t green = ((argb >> 8) & 0xff);
|
||||
uint32_t red_blue = (argb & 0x00ff00ffu);
|
||||
red_blue += (green << 16) | green;
|
||||
red_blue &= 0x00ff00ffu;
|
||||
data[i] = (argb & 0xff00ff00u) | red_blue;
|
||||
dst[i] = (argb & 0xff00ff00u) | red_blue;
|
||||
}
|
||||
}
|
||||
|
||||
@ -258,11 +259,12 @@ static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
|
||||
m->red_to_blue_ = (color_code >> 16) & 0xff;
|
||||
}
|
||||
|
||||
void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
const uint32_t* const src, int num_pixels,
|
||||
uint32_t* const dst) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
const uint32_t argb = src[i];
|
||||
const uint32_t green = argb >> 8;
|
||||
const uint32_t red = argb >> 16;
|
||||
int new_red = red;
|
||||
@ -272,13 +274,14 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
new_blue += ColorTransformDelta(m->green_to_blue_, green);
|
||||
new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
|
||||
new_blue &= 0xff;
|
||||
data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
}
|
||||
}
|
||||
|
||||
// Color space inverse transform.
|
||||
static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
|
||||
int y_start, int y_end, uint32_t* data) {
|
||||
int y_start, int y_end,
|
||||
const uint32_t* src, uint32_t* dst) {
|
||||
const int width = transform->xsize_;
|
||||
const int tile_width = 1 << transform->bits_;
|
||||
const int mask = tile_width - 1;
|
||||
@ -292,17 +295,19 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
|
||||
while (y < y_end) {
|
||||
const uint32_t* pred = pred_row;
|
||||
VP8LMultipliers m = { 0, 0, 0 };
|
||||
const uint32_t* const data_safe_end = data + safe_width;
|
||||
const uint32_t* const data_end = data + width;
|
||||
while (data < data_safe_end) {
|
||||
const uint32_t* const src_safe_end = src + safe_width;
|
||||
const uint32_t* const src_end = src + width;
|
||||
while (src < src_safe_end) {
|
||||
ColorCodeToMultipliers(*pred++, &m);
|
||||
VP8LTransformColorInverse(&m, data, tile_width);
|
||||
data += tile_width;
|
||||
VP8LTransformColorInverse(&m, src, tile_width, dst);
|
||||
src += tile_width;
|
||||
dst += tile_width;
|
||||
}
|
||||
if (data < data_end) { // Left-overs using C-version.
|
||||
if (src < src_end) { // Left-overs using C-version.
|
||||
ColorCodeToMultipliers(*pred++, &m);
|
||||
VP8LTransformColorInverse(&m, data, remaining_width);
|
||||
data += remaining_width;
|
||||
VP8LTransformColorInverse(&m, src, remaining_width, dst);
|
||||
src += remaining_width;
|
||||
dst += remaining_width;
|
||||
}
|
||||
++y;
|
||||
if ((y & mask) == 0) pred_row += tiles_per_row;
|
||||
@ -367,9 +372,13 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
assert(row_end <= transform->ysize_);
|
||||
switch (transform->type_) {
|
||||
case SUBTRACT_GREEN:
|
||||
VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
|
||||
VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
|
||||
break;
|
||||
case PREDICTOR_TRANSFORM:
|
||||
// TODO(vrabaud): parallelize transform predictors.
|
||||
if (in != out) {
|
||||
memcpy(out, in, (row_end - row_start) * width * sizeof(*out));
|
||||
}
|
||||
PredictorInverseTransform(transform, row_start, row_end, out);
|
||||
if (row_end != transform->ysize_) {
|
||||
// The last predicted row in this iteration will be the top-pred row
|
||||
@ -379,7 +388,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
|
||||
}
|
||||
break;
|
||||
case CROSS_COLOR_TRANSFORM:
|
||||
ColorSpaceInverseTransform(transform, row_start, row_end, out);
|
||||
ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
|
||||
break;
|
||||
case COLOR_INDEXING_TRANSFORM:
|
||||
if (in == out && transform->bits_ > 0) {
|
||||
@ -556,10 +565,10 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
VP8LTransformColorFunc VP8LTransformColorInverse;
|
||||
VP8LTransformColorInverseFunc VP8LTransformColorInverse;
|
||||
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
||||
|
@ -35,8 +35,9 @@ extern "C" {
|
||||
typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
|
||||
extern VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
typedef void (*VP8LProcessBlueAndRedFunc)(uint32_t* argb_data, int num_pixels);
|
||||
extern VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
typedef void (*VP8LProcessDecBlueAndRedFunc)(const uint32_t* src,
|
||||
int num_pixels, uint32_t* dst);
|
||||
extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
|
||||
typedef struct {
|
||||
// Note: the members are uint8_t, so that any negative values are
|
||||
@ -45,9 +46,10 @@ typedef struct {
|
||||
uint8_t green_to_blue_;
|
||||
uint8_t red_to_blue_;
|
||||
} VP8LMultipliers;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColorInverse;
|
||||
typedef void (*VP8LTransformColorInverseFunc)(const VP8LMultipliers* const m,
|
||||
const uint32_t* src,
|
||||
int num_pixels, uint32_t* dst);
|
||||
extern VP8LTransformColorInverseFunc VP8LTransformColorInverse;
|
||||
|
||||
struct VP8LTransform; // Defined in dec/vp8li.h.
|
||||
|
||||
@ -93,7 +95,8 @@ void VP8LColorIndexInverseTransformAlpha(
|
||||
|
||||
// Expose some C-only fallback functions
|
||||
void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels);
|
||||
const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
@ -102,7 +105,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels);
|
||||
void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
// Must be called before calling any of the above methods.
|
||||
void VP8LDspInit(void);
|
||||
@ -110,7 +114,10 @@ void VP8LDspInit(void);
|
||||
//------------------------------------------------------------------------------
|
||||
// Encoding
|
||||
|
||||
extern VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* const dst, int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColor;
|
||||
typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
|
@ -665,7 +665,7 @@ static void HistogramAdd(const VP8LHistogram* const a,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
|
||||
VP8LTransformColorFunc VP8LTransformColor;
|
||||
|
||||
|
@ -228,25 +228,27 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
||||
|
||||
// Add green to blue and red channels (i.e. perform the inverse transform of
|
||||
// 'subtract green').
|
||||
static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
|
||||
static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
uint32_t* const p_loop1_end = data + (num_pixels & ~3);
|
||||
uint32_t* const p_loop2_end = data + num_pixels;
|
||||
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
|
||||
const uint32_t* const p_loop2_end = src + num_pixels;
|
||||
__asm__ volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"beq %[data], %[p_loop1_end], 3f \n\t"
|
||||
"beq %[src], %[p_loop1_end], 3f \n\t"
|
||||
" nop \n\t"
|
||||
"0: \n\t"
|
||||
"lw %[temp0], 0(%[data]) \n\t"
|
||||
"lw %[temp1], 4(%[data]) \n\t"
|
||||
"lw %[temp2], 8(%[data]) \n\t"
|
||||
"lw %[temp3], 12(%[data]) \n\t"
|
||||
"lw %[temp0], 0(%[src]) \n\t"
|
||||
"lw %[temp1], 4(%[src]) \n\t"
|
||||
"lw %[temp2], 8(%[src]) \n\t"
|
||||
"lw %[temp3], 12(%[src]) \n\t"
|
||||
"ext %[temp4], %[temp0], 8, 8 \n\t"
|
||||
"ext %[temp5], %[temp1], 8, 8 \n\t"
|
||||
"ext %[temp6], %[temp2], 8, 8 \n\t"
|
||||
"ext %[temp7], %[temp3], 8, 8 \n\t"
|
||||
"addiu %[data], %[data], 16 \n\t"
|
||||
"addiu %[src], %[src], 16 \n\t"
|
||||
"addiu %[dst], %[dst], 16 \n\t"
|
||||
"replv.ph %[temp4], %[temp4] \n\t"
|
||||
"replv.ph %[temp5], %[temp5] \n\t"
|
||||
"replv.ph %[temp6], %[temp6] \n\t"
|
||||
@ -255,44 +257,47 @@ static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
|
||||
"addu.qb %[temp1], %[temp1], %[temp5] \n\t"
|
||||
"addu.qb %[temp2], %[temp2], %[temp6] \n\t"
|
||||
"addu.qb %[temp3], %[temp3], %[temp7] \n\t"
|
||||
"sw %[temp0], -16(%[data]) \n\t"
|
||||
"sw %[temp1], -12(%[data]) \n\t"
|
||||
"sw %[temp2], -8(%[data]) \n\t"
|
||||
"bne %[data], %[p_loop1_end], 0b \n\t"
|
||||
" sw %[temp3], -4(%[data]) \n\t"
|
||||
"sw %[temp0], -16(%[dst]) \n\t"
|
||||
"sw %[temp1], -12(%[dst]) \n\t"
|
||||
"sw %[temp2], -8(%[dst]) \n\t"
|
||||
"bne %[src], %[p_loop1_end], 0b \n\t"
|
||||
" sw %[temp3], -4(%[dst]) \n\t"
|
||||
"3: \n\t"
|
||||
"beq %[data], %[p_loop2_end], 2f \n\t"
|
||||
"beq %[src], %[p_loop2_end], 2f \n\t"
|
||||
" nop \n\t"
|
||||
"1: \n\t"
|
||||
"lw %[temp0], 0(%[data]) \n\t"
|
||||
"addiu %[data], %[data], 4 \n\t"
|
||||
"lw %[temp0], 0(%[src]) \n\t"
|
||||
"addiu %[src], %[src], 4 \n\t"
|
||||
"addiu %[dst], %[dst], 4 \n\t"
|
||||
"ext %[temp4], %[temp0], 8, 8 \n\t"
|
||||
"replv.ph %[temp4], %[temp4] \n\t"
|
||||
"addu.qb %[temp0], %[temp0], %[temp4] \n\t"
|
||||
"bne %[data], %[p_loop2_end], 1b \n\t"
|
||||
" sw %[temp0], -4(%[data]) \n\t"
|
||||
"bne %[src], %[p_loop2_end], 1b \n\t"
|
||||
" sw %[temp0], -4(%[dst]) \n\t"
|
||||
"2: \n\t"
|
||||
".set pop \n\t"
|
||||
: [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
|
||||
[temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
|
||||
: [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
|
||||
[temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
||||
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
|
||||
[temp7]"=&r"(temp7)
|
||||
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels) {
|
||||
const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
uint32_t argb, argb1, new_red;
|
||||
const uint32_t G_to_R = m->green_to_red_;
|
||||
const uint32_t G_to_B = m->green_to_blue_;
|
||||
const uint32_t R_to_B = m->red_to_blue_;
|
||||
uint32_t* const p_loop_end = data + (num_pixels & ~1);
|
||||
const uint32_t* const p_loop_end = src + (num_pixels & ~1);
|
||||
__asm__ volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"beq %[data], %[p_loop_end], 1f \n\t"
|
||||
"beq %[src], %[p_loop_end], 1f \n\t"
|
||||
" nop \n\t"
|
||||
"replv.ph %[temp0], %[G_to_R] \n\t"
|
||||
"replv.ph %[temp1], %[G_to_B] \n\t"
|
||||
@ -304,9 +309,12 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
"shra.ph %[temp1], %[temp1], 8 \n\t"
|
||||
"shra.ph %[temp2], %[temp2], 8 \n\t"
|
||||
"0: \n\t"
|
||||
"lw %[argb], 0(%[data]) \n\t"
|
||||
"lw %[argb1], 4(%[data]) \n\t"
|
||||
"addiu %[data], %[data], 8 \n\t"
|
||||
"lw %[argb], 0(%[src]) \n\t"
|
||||
"lw %[argb1], 4(%[src]) \n\t"
|
||||
"sw %[argb], 0(%[dst]) \n\t"
|
||||
"sw %[argb1], 4(%[dst]) \n\t"
|
||||
"addiu %[src], %[src], 8 \n\t"
|
||||
"addiu %[dst], %[dst], 8 \n\t"
|
||||
"precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
|
||||
"preceu.ph.qbra %[temp3], %[temp3] \n\t"
|
||||
"shll.ph %[temp3], %[temp3], 8 \n\t"
|
||||
@ -323,29 +331,29 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
"shll.ph %[temp4], %[temp5], 8 \n\t"
|
||||
"shra.ph %[temp4], %[temp4], 8 \n\t"
|
||||
"mul.ph %[temp4], %[temp4], %[temp2] \n\t"
|
||||
"sb %[temp5], -2(%[data]) \n\t"
|
||||
"sb %[temp5], -2(%[dst]) \n\t"
|
||||
"sra %[temp5], %[temp5], 16 \n\t"
|
||||
"shra.ph %[temp4], %[temp4], 5 \n\t"
|
||||
"addu.ph %[argb1], %[argb1], %[temp4] \n\t"
|
||||
"preceu.ph.qbra %[temp3], %[argb1] \n\t"
|
||||
"sb %[temp5], -6(%[data]) \n\t"
|
||||
"sb %[temp3], -4(%[data]) \n\t"
|
||||
"sb %[temp5], -6(%[dst]) \n\t"
|
||||
"sb %[temp3], -4(%[dst]) \n\t"
|
||||
"sra %[temp3], %[temp3], 16 \n\t"
|
||||
"bne %[data], %[p_loop_end], 0b \n\t"
|
||||
" sb %[temp3], -8(%[data]) \n\t"
|
||||
"bne %[src], %[p_loop_end], 0b \n\t"
|
||||
" sb %[temp3], -8(%[dst]) \n\t"
|
||||
"1: \n\t"
|
||||
".set pop \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
|
||||
[new_red]"=&r"(new_red), [argb]"=&r"(argb),
|
||||
[argb1]"=&r"(argb1), [data]"+&r"(data)
|
||||
[argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
|
||||
: [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
|
||||
[G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
// Fall-back to C-version for left-overs.
|
||||
if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
|
||||
if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB(const uint32_t* src,
|
||||
|
@ -244,44 +244,51 @@ static void ConvertBGRAToRGB(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
|
||||
static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
int i;
|
||||
uint8_t* ptemp_data = (uint8_t*)data;
|
||||
const uint8_t* in = (const uint8_t*)src;
|
||||
uint8_t* out = (uint8_t*)dst;
|
||||
v16u8 src0, dst0, tmp0;
|
||||
const v16u8 mask = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
|
||||
13, 255, 13, 255 };
|
||||
|
||||
while (num_pixels >= 8) {
|
||||
v16u8 src1, dst1, tmp1;
|
||||
LD_UB2(ptemp_data, 16, src0, src1);
|
||||
LD_UB2(in, 16, src0, src1);
|
||||
VSHF_B2_UB(src0, src1, src1, src0, mask, mask, tmp0, tmp1);
|
||||
ADD2(src0, tmp0, src1, tmp1, dst0, dst1);
|
||||
ST_UB2(dst0, dst1, ptemp_data, 16);
|
||||
ptemp_data += 32;
|
||||
ST_UB2(dst0, dst1, out, 16);
|
||||
in += 32;
|
||||
out += 32;
|
||||
num_pixels -= 8;
|
||||
}
|
||||
if (num_pixels > 0) {
|
||||
if (num_pixels >= 4) {
|
||||
src0 = LD_UB(ptemp_data);
|
||||
src0 = LD_UB(in);
|
||||
tmp0 = VSHF_UB(src0, src0, mask);
|
||||
dst0 = src0 + tmp0;
|
||||
ST_UB(dst0, ptemp_data);
|
||||
ptemp_data += 16;
|
||||
ST_UB(dst0, out);
|
||||
in += 16;
|
||||
out += 16;
|
||||
num_pixels -= 4;
|
||||
}
|
||||
for (i = 0; i < num_pixels; i++) {
|
||||
const uint8_t b = ptemp_data[0];
|
||||
const uint8_t g = ptemp_data[1];
|
||||
const uint8_t r = ptemp_data[2];
|
||||
ptemp_data[0] = (b + g) & 0xff;
|
||||
ptemp_data[2] = (r + g) & 0xff;
|
||||
ptemp_data += 4;
|
||||
const uint8_t b = in[0];
|
||||
const uint8_t g = in[1];
|
||||
const uint8_t r = in[2];
|
||||
out[0] = (b + g) & 0xff;
|
||||
out[1] = g;
|
||||
out[2] = (r + g) & 0xff;
|
||||
out[4] = in[4];
|
||||
out += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels) {
|
||||
const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
v16u8 src0, dst0;
|
||||
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
|
||||
(m->green_to_red_ << 16));
|
||||
@ -293,34 +300,36 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
|
||||
while (num_pixels >= 8) {
|
||||
v16u8 src1, dst1;
|
||||
LD_UB2(data, 4, src0, src1);
|
||||
LD_UB2(src, 4, src0, src1);
|
||||
TRANSFORM_COLOR_INVERSE_8(src0, src1, dst0, dst1, g2br, r2b, mask0, mask1);
|
||||
ST_UB2(dst0, dst1, data, 4);
|
||||
data += 8;
|
||||
ST_UB2(dst0, dst1, dst, 4);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
num_pixels -= 8;
|
||||
}
|
||||
if (num_pixels > 0) {
|
||||
if (num_pixels >= 4) {
|
||||
src0 = LD_UB(data);
|
||||
src0 = LD_UB(src);
|
||||
TRANSFORM_COLOR_INVERSE_4(src0, dst0, g2br, r2b, mask0, mask1);
|
||||
ST_UB(dst0, data);
|
||||
data += 4;
|
||||
ST_UB(dst0, dst);
|
||||
src += 4;
|
||||
dst += 4;
|
||||
num_pixels -= 4;
|
||||
}
|
||||
if (num_pixels > 0) {
|
||||
src0 = LD_UB(data);
|
||||
src0 = LD_UB(src);
|
||||
TRANSFORM_COLOR_INVERSE_4(src0, dst0, g2br, r2b, mask0, mask1);
|
||||
if (num_pixels == 3) {
|
||||
const uint64_t pix_d = __msa_copy_s_d((v2i64)dst0, 0);
|
||||
const uint32_t pix_w = __msa_copy_s_w((v4i32)dst0, 2);
|
||||
SD(pix_d, data + 0);
|
||||
SW(pix_w, data + 2);
|
||||
SD(pix_d, dst + 0);
|
||||
SW(pix_w, dst + 2);
|
||||
} else if (num_pixels == 2) {
|
||||
const uint64_t pix_d = __msa_copy_s_d((v2i64)dst0, 0);
|
||||
SD(pix_d, data);
|
||||
SD(pix_d, dst);
|
||||
} else {
|
||||
const uint32_t pix_w = __msa_copy_s_w((v4i32)dst0, 0);
|
||||
SW(pix_w, data);
|
||||
SW(pix_w, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -171,28 +171,30 @@ static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
|
||||
}
|
||||
#endif // USE_VTBLQ
|
||||
|
||||
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
const uint32_t* const end = argb_data + (num_pixels & ~3);
|
||||
static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~3);
|
||||
#ifdef USE_VTBLQ
|
||||
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
|
||||
#else
|
||||
const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
|
||||
#endif
|
||||
for (; argb_data < end; argb_data += 4) {
|
||||
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
|
||||
for (; src < end; src += 4, dst += 4) {
|
||||
const uint8x16_t argb = vld1q_u8((const uint8_t*)src);
|
||||
const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
|
||||
vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
|
||||
vst1q_u8((uint8_t*)dst, vaddq_u8(argb, greens));
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
|
||||
VP8LAddGreenToBlueAndRed_C(src, num_pixels & 3, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
// sign-extended multiplying constants, pre-shifted by 6.
|
||||
const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
// sign-extended multiplying constants, pre-shifted by 6.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
|
||||
const int16_t rb[8] = {
|
||||
CST(green_to_blue_), CST(green_to_red_),
|
||||
@ -219,7 +221,7 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
const uint32x4_t mask_ag = vdupq_n_u32(0xff00ff00u);
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
|
||||
const uint8x16_t in = vld1q_u8((const uint8_t*)(src + i));
|
||||
const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);
|
||||
// 0 g 0 g
|
||||
const uint8x16_t greens = DoGreenShuffle(in, shuffle);
|
||||
@ -240,10 +242,10 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
// 0 r' 0 b''
|
||||
const uint16x8_t G = vshrq_n_u16(vreinterpretq_u16_s8(F), 8);
|
||||
const uint32x4_t out = vorrq_u32(vreinterpretq_u32_u16(G), a0g0);
|
||||
vst1q_u32(argb_data + i, out);
|
||||
vst1q_u32(dst + i, out);
|
||||
}
|
||||
// Fall-back to C-version for left-overs.
|
||||
VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
|
||||
VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
|
||||
#undef USE_VTBLQ
|
||||
|
@ -157,26 +157,28 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Subtract-Green Transform
|
||||
|
||||
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
|
||||
const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
|
||||
const __m128i A = _mm_srli_epi16(in, 8); // 0 a 0 g
|
||||
const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // 0g0g
|
||||
const __m128i out = _mm_add_epi8(in, C);
|
||||
_mm_storeu_si128((__m128i*)&argb_data[i], out);
|
||||
_mm_storeu_si128((__m128i*)&dst[i], out);
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
VP8LAddGreenToBlueAndRed_C(argb_data + i, num_pixels - i);
|
||||
VP8LAddGreenToBlueAndRed_C(src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
// sign-extended multiplying constants, pre-shifted by 5.
|
||||
const uint32_t* const src, int num_pixels,
|
||||
uint32_t* dst) {
|
||||
// sign-extended multiplying constants, pre-shifted by 5.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
|
||||
const __m128i mults_rb = _mm_set_epi16(
|
||||
CST(green_to_red_), CST(green_to_blue_),
|
||||
@ -190,7 +192,7 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
|
||||
const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
|
||||
const __m128i A = _mm_and_si128(in, mask_ag); // a 0 g 0
|
||||
const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // g0g0
|
||||
@ -202,10 +204,10 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
const __m128i I = _mm_add_epi8(H, F); // r' x b'' 0
|
||||
const __m128i J = _mm_srli_epi16(I, 8); // 0 r' 0 b''
|
||||
const __m128i out = _mm_or_si128(J, A);
|
||||
_mm_storeu_si128((__m128i*)&argb_data[i], out);
|
||||
_mm_storeu_si128((__m128i*)&dst[i], out);
|
||||
}
|
||||
// Fall-back to C-version for left-overs.
|
||||
VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
|
||||
VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user