From fbba5bc2c1b290049f8b2daaf32b0d34b4dd5c0f Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Mon, 12 Dec 2016 17:41:36 +0100 Subject: [PATCH] optimize predictor #1 in plain-C For some reason, gcc has hard time inlining this one... Also optimize predictor #0 and #1 for encoding, so we don't have to call the generic pointers VP8LPredictors[...] Change-Id: I1ff31e3b83874b53f84fe23487f644619fd61db9 --- src/dsp/lossless.c | 10 +++++- src/dsp/lossless_enc.c | 72 ++++++++++++++++++++++++------------------ 2 files changed, 51 insertions(+), 31 deletions(-) diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index f922dc3a..07230194 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -168,7 +168,15 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { } GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0) -GENERATE_PREDICTOR_ADD(Predictor1, PredictorAdd1) +static void PredictorAdd1(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i; + uint32_t left = out[-1]; + for (i = 0; i < num_pixels; ++i) { + out[i] = left = VP8LAddPixels(in[i], left); + } + (void)upper; +} GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2) GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3) GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4) diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c index 7b18ab40..32ad5413 100644 --- a/src/dsp/lossless_enc.c +++ b/src/dsp/lossless_enc.c @@ -665,20 +665,32 @@ static void HistogramAdd(const VP8LHistogram* const a, //------------------------------------------------------------------------------ -GENERATE_PREDICTOR_SUB(VP8LPredictors[0], PredictorSub0) -GENERATE_PREDICTOR_SUB(VP8LPredictors[1], PredictorSub1) -GENERATE_PREDICTOR_SUB(VP8LPredictors[2], PredictorSub2) -GENERATE_PREDICTOR_SUB(VP8LPredictors[3], PredictorSub3) -GENERATE_PREDICTOR_SUB(VP8LPredictors[4], PredictorSub4) -GENERATE_PREDICTOR_SUB(VP8LPredictors[5], PredictorSub5) -GENERATE_PREDICTOR_SUB(VP8LPredictors[6], PredictorSub6) -GENERATE_PREDICTOR_SUB(VP8LPredictors[7], PredictorSub7) -GENERATE_PREDICTOR_SUB(VP8LPredictors[8], PredictorSub8) -GENERATE_PREDICTOR_SUB(VP8LPredictors[9], PredictorSub9) -GENERATE_PREDICTOR_SUB(VP8LPredictors[10], PredictorSub10) -GENERATE_PREDICTOR_SUB(VP8LPredictors[11], PredictorSub11) -GENERATE_PREDICTOR_SUB(VP8LPredictors[12], PredictorSub12) -GENERATE_PREDICTOR_SUB(VP8LPredictors[13], PredictorSub13) +static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i; + for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK); + (void)upper; +} + +static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i; + for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]); + (void)upper; +} + +GENERATE_PREDICTOR_SUB(VP8LPredictors[2], PredictorSub2_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[3], PredictorSub3_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[4], PredictorSub4_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[5], PredictorSub5_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[6], PredictorSub6_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[7], PredictorSub7_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[8], PredictorSub8_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[9], PredictorSub9_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[10], PredictorSub10_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[11], PredictorSub11_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[12], PredictorSub12_C) +GENERATE_PREDICTOR_SUB(VP8LPredictors[13], PredictorSub13_C) VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; @@ -739,22 +751,22 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) { VP8LVectorMismatch = VectorMismatch; - VP8LPredictorsSub[0] = PredictorSub0; - VP8LPredictorsSub[1] = PredictorSub1; - VP8LPredictorsSub[2] = PredictorSub2; - VP8LPredictorsSub[3] = PredictorSub3; - VP8LPredictorsSub[4] = PredictorSub4; - VP8LPredictorsSub[5] = PredictorSub5; - VP8LPredictorsSub[6] = PredictorSub6; - VP8LPredictorsSub[7] = PredictorSub7; - VP8LPredictorsSub[8] = PredictorSub8; - VP8LPredictorsSub[9] = PredictorSub9; - VP8LPredictorsSub[10] = PredictorSub10; - VP8LPredictorsSub[11] = PredictorSub11; - VP8LPredictorsSub[12] = PredictorSub12; - VP8LPredictorsSub[13] = PredictorSub13; - VP8LPredictorsSub[14] = PredictorSub0; // <- padding security sentinels - VP8LPredictorsSub[15] = PredictorSub0; + VP8LPredictorsSub[0] = PredictorSub0_C; + VP8LPredictorsSub[1] = PredictorSub1_C; + VP8LPredictorsSub[2] = PredictorSub2_C; + VP8LPredictorsSub[3] = PredictorSub3_C; + VP8LPredictorsSub[4] = PredictorSub4_C; + VP8LPredictorsSub[5] = PredictorSub5_C; + VP8LPredictorsSub[6] = PredictorSub6_C; + VP8LPredictorsSub[7] = PredictorSub7_C; + VP8LPredictorsSub[8] = PredictorSub8_C; + VP8LPredictorsSub[9] = PredictorSub9_C; + VP8LPredictorsSub[10] = PredictorSub10_C; + VP8LPredictorsSub[11] = PredictorSub11_C; + VP8LPredictorsSub[12] = PredictorSub12_C; + VP8LPredictorsSub[13] = PredictorSub13_C; + VP8LPredictorsSub[14] = PredictorSub0_C; // <- padding security sentinels + VP8LPredictorsSub[15] = PredictorSub0_C; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) {