MIPS: dspr2: added optimization for function TransformColorRed
added new function CollectColorRedTransforms to C, which calls TransformColorRed and it is realized via pointer to function Change-Id: Ia68d73bfcf1ca2cb443dc2825910946221f87835
This commit is contained in:
parent
2cb39180cc
commit
a7e7caa486
@ -1067,13 +1067,10 @@ static float PredictionCostCrossColor(const int accumulated[256],
|
||||
PredictionCostSpatial(counts, 3, kExpValue);
|
||||
}
|
||||
|
||||
static float GetPredictionCostCrossColorRed(
|
||||
static void CollectColorRedTransforms(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
|
||||
const int accumulated_red_histo[256], const uint32_t* const argb) {
|
||||
int xsize, int green_to_red, int* histo, const uint32_t* const argb) {
|
||||
int all_y;
|
||||
int histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
|
||||
int ix = all_y * xsize + tile_x_offset;
|
||||
int all_x;
|
||||
@ -1081,6 +1078,19 @@ static float GetPredictionCostCrossColorRed(
|
||||
++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static float GetPredictionCostCrossColorRed(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
|
||||
const int accumulated_red_histo[256], const uint32_t* const argb) {
|
||||
int histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
|
||||
VP8LCollectColorRedTransforms(tile_x_offset, tile_y_offset, all_x_max,
|
||||
all_y_max, xsize, green_to_red,
|
||||
histo, argb);
|
||||
|
||||
cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
|
||||
if ((uint8_t)green_to_red == prev_x.green_to_red_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
@ -1738,6 +1748,7 @@ VP8LConvertFunc VP8LConvertBGRAToRGB565;
|
||||
VP8LConvertFunc VP8LConvertBGRAToBGR;
|
||||
|
||||
VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
|
||||
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||
@ -1779,6 +1790,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
|
||||
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
||||
|
||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms;
|
||||
|
||||
VP8LFastLog2Slow = FastLog2Slow;
|
||||
VP8LFastSLog2Slow = FastSLog2Slow;
|
||||
|
@ -65,6 +65,11 @@ typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* const argb);
|
||||
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
|
||||
typedef void (*VP8LCollectColorRedTransformsFunc)(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, int green_to_red, int* histo, const uint32_t* const argb);
|
||||
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
|
||||
// Expose some C-only fallback functions
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels);
|
||||
|
@ -424,6 +424,51 @@ static void CollectColorBlueTransforms(
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
||||
uint32_t argb) {
|
||||
const uint32_t green = argb >> 8;
|
||||
uint32_t new_red = argb >> 16;
|
||||
new_red -= ColorTransformDelta(green_to_red, green);
|
||||
return (new_red & 0xff);
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, int green_to_red, int* histo, const uint32_t* const argb) {
|
||||
const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
|
||||
int ix = tile_y_offset * xsize + tile_x_offset;
|
||||
int all_y;
|
||||
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
|
||||
uint32_t* p_argb = (uint32_t*)&argb[ix];
|
||||
const int loop_cnt = all_x_max - tile_x_offset;
|
||||
int all_x;
|
||||
ix += xsize;
|
||||
for (all_x = 0; all_x < (loop_cnt >> 1); ++all_x) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
__asm__ volatile (
|
||||
"lw %[temp0], 0(%[p_argb]) \n\t"
|
||||
"lw %[temp1], 4(%[p_argb]) \n\t"
|
||||
"precrq.ph.w %[temp4], %[temp0], %[temp1] \n\t"
|
||||
"ins %[temp1], %[temp0], 16, 16 \n\t"
|
||||
"shra.ph %[temp3], %[temp1], 8 \n\t"
|
||||
"mul.ph %[temp2], %[temp3], %[gtr] \n\t"
|
||||
"addiu %[p_argb], %[p_argb], 8 \n\t"
|
||||
"shra.ph %[temp2], %[temp2], 5 \n\t"
|
||||
"subu.qb %[temp2], %[temp4], %[temp2] \n\t"
|
||||
: [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
|
||||
: [gtr]"r"(gtr)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
++histo[(uint8_t)(temp2 >> 16)];
|
||||
++histo[(uint8_t)temp2];
|
||||
}
|
||||
if (loop_cnt & 1) {
|
||||
++histo[TransformColorRed(green_to_red, *p_argb)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // WEBP_USE_MIPS_DSP_R2
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -446,6 +491,7 @@ void VP8LDspInitMIPSdspR2(void) {
|
||||
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
|
||||
VP8LTransformColor = TransformColor;
|
||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms;
|
||||
#endif // WEBP_USE_MIPS_DSP_R2
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user