From a987faedfa46e87f3af401501499b670f4900763 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Sat, 7 Feb 2015 02:13:26 -0800 Subject: [PATCH] MIPS: dspr2: added optimization for function GetResidualCost set/get residual C functions moved to new file in src/dsp mips32 version of GetResidualCost moved to new file Change-Id: I7cebb7933a89820ff28c187249a9181f281081d2 --- Android.mk | 3 + Makefile.vc | 3 + makefile.unix | 3 + src/dsp/Makefile.am | 3 + src/dsp/cost.c | 105 +++++++++++++++++++++++++++ src/dsp/cost_mips32.c | 141 +++++++++++++++++++++++++++++++++++++ src/dsp/cost_mips_dsp_r2.c | 112 +++++++++++++++++++++++++++++ src/dsp/dsp.h | 15 ++++ src/dsp/enc_mips32.c | 113 ----------------------------- src/enc/cost.c | 95 ------------------------- src/enc/cost.h | 17 +---- src/enc/frame.c | 3 +- src/enc/webpenc.c | 5 +- 13 files changed, 392 insertions(+), 226 deletions(-) create mode 100644 src/dsp/cost.c create mode 100644 src/dsp/cost_mips32.c create mode 100644 src/dsp/cost_mips_dsp_r2.c diff --git a/Android.mk b/Android.mk index bd53234b..93656190 100644 --- a/Android.mk +++ b/Android.mk @@ -38,6 +38,9 @@ LOCAL_SRC_FILES := \ src/dsp/argb.c \ src/dsp/argb_mips_dsp_r2.c \ src/dsp/argb_sse2.c \ + src/dsp/cost.c \ + src/dsp/cost_mips32.c \ + src/dsp/cost_mips_dsp_r2.c \ src/dsp/cpu.c \ src/dsp/dec.c \ src/dsp/dec_clip_tables.c \ diff --git a/Makefile.vc b/Makefile.vc index 12691e28..47abe665 100644 --- a/Makefile.vc +++ b/Makefile.vc @@ -219,6 +219,9 @@ DSP_ENC_OBJS = \ $(DIROBJ)\dsp\argb.obj \ $(DIROBJ)\dsp\argb_mips_dsp_r2.obj \ $(DIROBJ)\dsp\argb_sse2.obj \ + $(DIROBJ)\dsp\cost.obj \ + $(DIROBJ)\dsp\cost_mips32.obj \ + $(DIROBJ)\dsp\cost_mips_dsp_r2.obj \ $(DIROBJ)\dsp\enc.obj \ $(DIROBJ)\dsp\enc_avx2.obj \ $(DIROBJ)\dsp\enc_mips32.obj \ diff --git a/makefile.unix b/makefile.unix index 20fbf8df..fc6e283d 100644 --- a/makefile.unix +++ b/makefile.unix @@ -143,6 +143,9 @@ DSP_ENC_OBJS = \ src/dsp/argb.o \ src/dsp/argb_mips_dsp_r2.o \ src/dsp/argb_sse2.o \ + src/dsp/cost.o \ + src/dsp/cost_mips32.o \ + src/dsp/cost_mips_dsp_r2.o \ src/dsp/enc.o \ src/dsp/enc_avx2.o \ src/dsp/enc_mips32.o \ diff --git a/src/dsp/Makefile.am b/src/dsp/Makefile.am index 04231376..307c4409 100644 --- a/src/dsp/Makefile.am +++ b/src/dsp/Makefile.am @@ -41,6 +41,9 @@ COMMON_SOURCES += yuv_mips_dsp_r2.c ENC_SOURCES = ENC_SOURCES += argb.c ENC_SOURCES += argb_mips_dsp_r2.c +ENC_SOURCES += cost.c +ENC_SOURCES += cost_mips32.c +ENC_SOURCES += cost_mips_dsp_r2.c ENC_SOURCES += enc.c ENC_SOURCES += enc_mips32.c ENC_SOURCES += enc_mips_dsp_r2.c diff --git a/src/dsp/cost.c b/src/dsp/cost.c new file mode 100644 index 00000000..f7fc8440 --- /dev/null +++ b/src/dsp/cost.c @@ -0,0 +1,105 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./dsp.h" +#include "../enc/cost.h" + +//------------------------------------------------------------------------------ +// Mode costs + +static int GetResidualCost(int ctx0, const VP8Residual* const res) { + int n = res->first; + // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 + const int p0 = res->prob[n][ctx0][0]; + const uint16_t* t = res->cost[n][ctx0]; + // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 + // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll + // be missing during the loop. + int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; + + if (res->last < 0) { + return VP8BitCost(0, p0); + } + for (; n < res->last; ++n) { + const int v = abs(res->coeffs[n]); + const int b = VP8EncBands[n + 1]; + const int ctx = (v >= 2) ? 2 : v; + cost += VP8LevelCost(t, v); + t = res->cost[b][ctx]; + } + // Last coefficient is always non-zero + { + const int v = abs(res->coeffs[n]); + assert(v != 0); + cost += VP8LevelCost(t, v); + if (n < 15) { + const int b = VP8EncBands[n + 1]; + const int ctx = (v == 1) ? 1 : 2; + const int last_p0 = res->prob[b][ctx][0]; + cost += VP8BitCost(0, last_p0); + } + } + return cost; +} + +static void SetResidualCoeffs(const int16_t* const coeffs, + VP8Residual* const res) { + int n; + res->last = -1; + assert(res->first == 0 || coeffs[0] == 0); + for (n = 15; n >= 0; --n) { + if (coeffs[n]) { + res->last = n; + break; + } + } + res->coeffs = coeffs; +} + +//------------------------------------------------------------------------------ +// init function + +VP8GetResidualCostFunc VP8GetResidualCost; +VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; + +extern void VP8EncDspCostInitMIPS32(void); +extern void VP8EncDspCostInitMIPSdspR2(void); + +#if defined(WEBP_USE_SSE2) +extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs, + VP8Residual* const res); +#endif // WEBP_USE_SSE2 + +void VP8EncDspCostInit(void) { + VP8GetResidualCost = GetResidualCost; + VP8SetResidualCoeffs = SetResidualCoeffs; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_MIPS32) + if (VP8GetCPUInfo(kMIPS32)) { + VP8EncDspCostInitMIPS32(); + } +#endif +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8EncDspCostInitMIPSdspR2(); + } +#endif +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2; + } +#endif + } +} + +//------------------------------------------------------------------------------ diff --git a/src/dsp/cost_mips32.c b/src/dsp/cost_mips32.c new file mode 100644 index 00000000..48f7395d --- /dev/null +++ b/src/dsp/cost_mips32.c @@ -0,0 +1,141 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Djordje Pesut (djordje.pesut@imgtec.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_MIPS32) + +#include "../enc/cost.h" + +static int GetResidualCost(int ctx0, const VP8Residual* const res) { + int n = res->first; + // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 + int p0 = res->prob[n][ctx0][0]; + const uint16_t* t = res->cost[n][ctx0]; + int cost; + const int const_2 = 2; + const int const_255 = 255; + const int const_max_level = MAX_VARIABLE_LEVEL; + int res_cost; + int res_prob; + int res_coeffs; + int res_last; + int v_reg; + int b_reg; + int ctx_reg; + int cost_add, temp_1, temp_2, temp_3; + + if (res->last < 0) { + return VP8BitCost(0, p0); + } + + cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; + + res_cost = (int)res->cost; + res_prob = (int)res->prob; + res_coeffs = (int)res->coeffs; + res_last = (int)res->last; + + __asm__ volatile( + ".set push \n\t" + ".set noreorder \n\t" + + "sll %[temp_1], %[n], 1 \n\t" + "addu %[res_coeffs], %[res_coeffs], %[temp_1] \n\t" + "slt %[temp_2], %[n], %[res_last] \n\t" + "bnez %[temp_2], 1f \n\t" + " li %[cost_add], 0 \n\t" + "b 2f \n\t" + " nop \n\t" + "1: \n\t" + "lh %[v_reg], 0(%[res_coeffs]) \n\t" + "addu %[b_reg], %[n], %[VP8EncBands] \n\t" + "move %[temp_1], %[const_max_level] \n\t" + "addu %[cost], %[cost], %[cost_add] \n\t" + "negu %[temp_2], %[v_reg] \n\t" + "slti %[temp_3], %[v_reg], 0 \n\t" + "movn %[v_reg], %[temp_2], %[temp_3] \n\t" + "lbu %[b_reg], 1(%[b_reg]) \n\t" + "li %[cost_add], 0 \n\t" + + "sltiu %[temp_3], %[v_reg], 2 \n\t" + "move %[ctx_reg], %[v_reg] \n\t" + "movz %[ctx_reg], %[const_2], %[temp_3] \n\t" + // cost += VP8LevelCost(t, v); + "slt %[temp_3], %[v_reg], %[const_max_level] \n\t" + "movn %[temp_1], %[v_reg], %[temp_3] \n\t" + "sll %[temp_2], %[v_reg], 1 \n\t" + "addu %[temp_2], %[temp_2], %[VP8LevelFixedCosts] \n\t" + "lhu %[temp_2], 0(%[temp_2]) \n\t" + "sll %[temp_1], %[temp_1], 1 \n\t" + "addu %[temp_1], %[temp_1], %[t] \n\t" + "lhu %[temp_3], 0(%[temp_1]) \n\t" + "addu %[cost], %[cost], %[temp_2] \n\t" + + // t = res->cost[b][ctx]; + "sll %[temp_1], %[ctx_reg], 7 \n\t" + "sll %[temp_2], %[ctx_reg], 3 \n\t" + "addu %[cost], %[cost], %[temp_3] \n\t" + "addu %[temp_1], %[temp_1], %[temp_2] \n\t" + "sll %[temp_2], %[b_reg], 3 \n\t" + "sll %[temp_3], %[b_reg], 5 \n\t" + "sub %[temp_2], %[temp_3], %[temp_2] \n\t" + "sll %[temp_3], %[temp_2], 4 \n\t" + "addu %[temp_1], %[temp_1], %[temp_3] \n\t" + "addu %[temp_2], %[temp_2], %[res_cost] \n\t" + "addiu %[n], %[n], 1 \n\t" + "addu %[t], %[temp_1], %[temp_2] \n\t" + "slt %[temp_1], %[n], %[res_last] \n\t" + "bnez %[temp_1], 1b \n\t" + " addiu %[res_coeffs], %[res_coeffs], 2 \n\t" + "2: \n\t" + + ".set pop \n\t" + : [cost]"+r"(cost), [t]"+r"(t), [n]"+r"(n), [v_reg]"=&r"(v_reg), + [ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [cost_add]"=&r"(cost_add), + [temp_1]"=&r"(temp_1), [temp_2]"=&r"(temp_2), [temp_3]"=&r"(temp_3) + : [const_2]"r"(const_2), [const_255]"r"(const_255), [res_last]"r"(res_last), + [VP8EntropyCost]"r"(VP8EntropyCost), [VP8EncBands]"r"(VP8EncBands), + [const_max_level]"r"(const_max_level), [res_prob]"r"(res_prob), + [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_coeffs]"r"(res_coeffs), + [res_cost]"r"(res_cost) + : "memory" + ); + + // Last coefficient is always non-zero + { + const int v = abs(res->coeffs[n]); + assert(v != 0); + cost += VP8LevelCost(t, v); + if (n < 15) { + const int b = VP8EncBands[n + 1]; + const int ctx = (v == 1) ? 1 : 2; + const int last_p0 = res->prob[b][ctx][0]; + cost += VP8BitCost(0, last_p0); + } + } + return cost; +} + +#endif // WEBP_USE_MIPS32 + +//------------------------------------------------------------------------------ +// Entry point + +extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void); + +void VP8EncDspCostInitMIPS32(void) { +#if defined(WEBP_USE_MIPS32) + VP8GetResidualCost = GetResidualCost; +#endif // WEBP_USE_MIPS32 +} + +//------------------------------------------------------------------------------ diff --git a/src/dsp/cost_mips_dsp_r2.c b/src/dsp/cost_mips_dsp_r2.c new file mode 100644 index 00000000..f388b866 --- /dev/null +++ b/src/dsp/cost_mips_dsp_r2.c @@ -0,0 +1,112 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Djordje Pesut (djordje.pesut@imgtec.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_MIPS_DSP_R2) + +#include "../enc/cost.h" + +static int GetResidualCost(int ctx0, const VP8Residual* const res) { + int temp0, temp1, temp2; + int v_reg, b_reg, ctx_reg; + int n = res->first; + // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 + int p0 = res->prob[n][ctx0][0]; + const uint16_t* t = res->cost[n][ctx0]; + // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 + // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll + // be missing during the loop. + int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; + int res_cost = (int)res->cost; + int res_coeffs = (int)res->coeffs; + int res_last = (int)res->last; + const int const_max_level = MAX_VARIABLE_LEVEL; + const int const_2 = 2; + const int const_408 = 408; + int mult_136_408 = 136; + + if (res->last < 0) { + return VP8BitCost(0, p0); + } + + __asm__ volatile( + ".set push \n\t" + ".set noreorder \n\t" + "subu %[temp1], %[res_last], %[n] \n\t" + "blez %[temp1], 2f \n\t" + " ins %[mult_136_408], %[const_408], 16, 16 \n\t" + "1: \n\t" + "sll %[temp0], %[n], 1 \n\t" + "lhx %[v_reg], %[temp0](%[res_coeffs]) \n\t" + "addiu %[n], %[n], 1 \n\t" + "absq_s.w %[v_reg], %[v_reg] \n\t" + "lbux %[b_reg], %[n](%[VP8EncBands]) \n\t" + "sltiu %[temp2], %[v_reg], 2 \n\t" + "move %[ctx_reg], %[v_reg] \n\t" + "movz %[ctx_reg], %[const_2], %[temp2] \n\t" + "sll %[temp1], %[v_reg], 1 \n\t" + "lhx %[temp1], %[temp1](%[VP8LevelFixedCosts]) \n\t" + "slt %[temp2], %[v_reg], %[const_max_level] \n\t" + "ins %[ctx_reg], %[b_reg], 16, 16 \n\t" + "movz %[v_reg], %[const_max_level], %[temp2] \n\t" + "mul.ph %[temp0], %[ctx_reg], %[mult_136_408] \n\t" + "addu %[cost], %[cost], %[temp1] \n\t" + "sll %[v_reg], %[v_reg], 1 \n\t" + "lhx %[temp2], %[v_reg](%[t]) \n\t" + "ext %[temp1], %[temp0], 0, 16 \n\t" + "ext %[temp0], %[temp0], 16, 16 \n\t" + "addu %[cost], %[cost], %[temp2] \n\t" + "addu %[temp1], %[temp1], %[res_cost] \n\t" + "bne %[n], %[res_last], 1b \n\t" + " addu %[t], %[temp0], %[temp1] \n\t" + "2: \n\t" + ".set pop \n\t" + : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg), + [ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [temp0]"=&r"(temp0), + [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), + [mult_136_408]"+&r"(mult_136_408) + : [const_2]"r"(const_2), [res_last]"r"(res_last), + [VP8EncBands]"r"(VP8EncBands), [const_max_level]"r"(const_max_level), + [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_cost]"r"(res_cost), + [const_408]"r"(const_408), [res_coeffs]"r"(res_coeffs) + : "memory" + ); + + // Last coefficient is always non-zero + { + const int v = abs(res->coeffs[n]); + assert(v != 0); + cost += VP8LevelCost(t, v); + if (n < 15) { + const int b = VP8EncBands[n + 1]; + const int ctx = (v == 1) ? 1 : 2; + const int last_p0 = res->prob[b][ctx][0]; + cost += VP8BitCost(0, last_p0); + } + } + return cost; +} + +#endif // WEBP_USE_MIPS_DSP_R2 + +//------------------------------------------------------------------------------ +// Entry point + +extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void); + +void VP8EncDspCostInitMIPSdspR2(void) { +#if defined(WEBP_USE_MIPS_DSP_R2) + VP8GetResidualCost = GetResidualCost; +#endif // WEBP_USE_MIPS_DSP_R2 +} + +//------------------------------------------------------------------------------ diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index c4809fec..d92e1ae7 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -176,6 +176,21 @@ void VP8LSetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], // must be called before using any of the above WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void); +//------------------------------------------------------------------------------ +// cost functions (encoding) + +struct VP8Residual; +typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs, + struct VP8Residual* const res); +extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; + +// Cost calculation function. +typedef int (*VP8GetResidualCostFunc)(int ctx0, + const struct VP8Residual* const res); +extern VP8GetResidualCostFunc VP8GetResidualCost; + +void VP8EncDspCostInit(void); // must be called first + //------------------------------------------------------------------------------ // Decoding diff --git a/src/dsp/enc_mips32.c b/src/dsp/enc_mips32.c index 78a6f961..b5b51fb1 100644 --- a/src/dsp/enc_mips32.c +++ b/src/dsp/enc_mips32.c @@ -513,119 +513,6 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { #undef VERTICAL_PASS #undef HORIZONTAL_PASS -// Forward declaration. -extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res); - -int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) { - int n = res->first; - // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 - int p0 = res->prob[n][ctx0][0]; - const uint16_t* t = res->cost[n][ctx0]; - int cost; - const int const_2 = 2; - const int const_255 = 255; - const int const_max_level = MAX_VARIABLE_LEVEL; - int res_cost; - int res_prob; - int res_coeffs; - int res_last; - int v_reg; - int b_reg; - int ctx_reg; - int cost_add, temp_1, temp_2, temp_3; - - if (res->last < 0) { - return VP8BitCost(0, p0); - } - - cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; - - res_cost = (int)res->cost; - res_prob = (int)res->prob; - res_coeffs = (int)res->coeffs; - res_last = (int)res->last; - - __asm__ volatile( - ".set push \n\t" - ".set noreorder \n\t" - - "sll %[temp_1], %[n], 1 \n\t" - "addu %[res_coeffs], %[res_coeffs], %[temp_1] \n\t" - "slt %[temp_2], %[n], %[res_last] \n\t" - "bnez %[temp_2], 1f \n\t" - " li %[cost_add], 0 \n\t" - "b 2f \n\t" - " nop \n\t" - "1: \n\t" - "lh %[v_reg], 0(%[res_coeffs]) \n\t" - "addu %[b_reg], %[n], %[VP8EncBands] \n\t" - "move %[temp_1], %[const_max_level] \n\t" - "addu %[cost], %[cost], %[cost_add] \n\t" - "negu %[temp_2], %[v_reg] \n\t" - "slti %[temp_3], %[v_reg], 0 \n\t" - "movn %[v_reg], %[temp_2], %[temp_3] \n\t" - "lbu %[b_reg], 1(%[b_reg]) \n\t" - "li %[cost_add], 0 \n\t" - - "sltiu %[temp_3], %[v_reg], 2 \n\t" - "move %[ctx_reg], %[v_reg] \n\t" - "movz %[ctx_reg], %[const_2], %[temp_3] \n\t" - // cost += VP8LevelCost(t, v); - "slt %[temp_3], %[v_reg], %[const_max_level] \n\t" - "movn %[temp_1], %[v_reg], %[temp_3] \n\t" - "sll %[temp_2], %[v_reg], 1 \n\t" - "addu %[temp_2], %[temp_2], %[VP8LevelFixedCosts] \n\t" - "lhu %[temp_2], 0(%[temp_2]) \n\t" - "sll %[temp_1], %[temp_1], 1 \n\t" - "addu %[temp_1], %[temp_1], %[t] \n\t" - "lhu %[temp_3], 0(%[temp_1]) \n\t" - "addu %[cost], %[cost], %[temp_2] \n\t" - - // t = res->cost[b][ctx]; - "sll %[temp_1], %[ctx_reg], 7 \n\t" - "sll %[temp_2], %[ctx_reg], 3 \n\t" - "addu %[cost], %[cost], %[temp_3] \n\t" - "addu %[temp_1], %[temp_1], %[temp_2] \n\t" - "sll %[temp_2], %[b_reg], 3 \n\t" - "sll %[temp_3], %[b_reg], 5 \n\t" - "sub %[temp_2], %[temp_3], %[temp_2] \n\t" - "sll %[temp_3], %[temp_2], 4 \n\t" - "addu %[temp_1], %[temp_1], %[temp_3] \n\t" - "addu %[temp_2], %[temp_2], %[res_cost] \n\t" - "addiu %[n], %[n], 1 \n\t" - "addu %[t], %[temp_1], %[temp_2] \n\t" - "slt %[temp_1], %[n], %[res_last] \n\t" - "bnez %[temp_1], 1b \n\t" - " addiu %[res_coeffs], %[res_coeffs], 2 \n\t" - "2: \n\t" - - ".set pop \n\t" - : [cost]"+r"(cost), [t]"+r"(t), [n]"+r"(n), [v_reg]"=&r"(v_reg), - [ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [cost_add]"=&r"(cost_add), - [temp_1]"=&r"(temp_1), [temp_2]"=&r"(temp_2), [temp_3]"=&r"(temp_3) - : [const_2]"r"(const_2), [const_255]"r"(const_255), [res_last]"r"(res_last), - [VP8EntropyCost]"r"(VP8EntropyCost), [VP8EncBands]"r"(VP8EncBands), - [const_max_level]"r"(const_max_level), [res_prob]"r"(res_prob), - [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_coeffs]"r"(res_coeffs), - [res_cost]"r"(res_cost) - : "memory" - ); - - // Last coefficient is always non-zero - { - const int v = abs(res->coeffs[n]); - assert(v != 0); - cost += VP8LevelCost(t, v); - if (n < 15) { - const int b = VP8EncBands[n + 1]; - const int ctx = (v == 1) ? 1 : 2; - const int last_p0 = res->prob[b][ctx][0]; - cost += VP8BitCost(0, last_p0); - } - } - return cost; -} - #if !defined(WORK_AROUND_GCC) #define GET_SSE_INNER(A, B, C, D) \ diff --git a/src/enc/cost.c b/src/enc/cost.c index 9d2cc017..c895555c 100644 --- a/src/enc/cost.c +++ b/src/enc/cost.c @@ -486,66 +486,6 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = { { 305, 1167, 1358, 899, 1587, 1587, 987, 1988, 1332, 501 } } }; -//------------------------------------------------------------------------------ -// Mode costs - -static int GetResidualCost(int ctx0, const VP8Residual* const res) { - int n = res->first; - // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 - const int p0 = res->prob[n][ctx0][0]; - const uint16_t* t = res->cost[n][ctx0]; - // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 - // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll - // be missing during the loop. - int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; - - if (res->last < 0) { - return VP8BitCost(0, p0); - } - for (; n < res->last; ++n) { - const int v = abs(res->coeffs[n]); - const int b = VP8EncBands[n + 1]; - const int ctx = (v >= 2) ? 2 : v; - cost += VP8LevelCost(t, v); - t = res->cost[b][ctx]; - } - // Last coefficient is always non-zero - { - const int v = abs(res->coeffs[n]); - assert(v != 0); - cost += VP8LevelCost(t, v); - if (n < 15) { - const int b = VP8EncBands[n + 1]; - const int ctx = (v == 1) ? 1 : 2; - const int last_p0 = res->prob[b][ctx][0]; - cost += VP8BitCost(0, last_p0); - } - } - return cost; -} - -//------------------------------------------------------------------------------ -// init function - -#if defined(WEBP_USE_MIPS32) -extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res); -#endif // WEBP_USE_MIPS32 - -// TODO(skal): this, and GetResidualCost(), should probably go somewhere -// under src/dsp/ at some point. -VP8GetResidualCostFunc VP8GetResidualCost; - -void VP8GetResidualCostInit(void) { - VP8GetResidualCost = GetResidualCost; - if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_MIPS32) - if (VP8GetCPUInfo(kMIPS32)) { - VP8GetResidualCost = VP8GetResidualCostMIPS32; - } -#endif - } -} - //------------------------------------------------------------------------------ // helper functions for residuals struct VP8Residual. @@ -558,41 +498,6 @@ void VP8InitResidual(int first, int coeff_type, res->first = first; } -static void SetResidualCoeffs(const int16_t* const coeffs, - VP8Residual* const res) { - int n; - res->last = -1; - assert(res->first == 0 || coeffs[0] == 0); - for (n = 15; n >= 0; --n) { - if (coeffs[n]) { - res->last = n; - break; - } - } - res->coeffs = coeffs; -} - -//------------------------------------------------------------------------------ -// init function - -#if defined(WEBP_USE_SSE2) -extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs, - VP8Residual* const res); -#endif // WEBP_USE_SSE2 - -VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; - -void VP8SetResidualCoeffsInit(void) { - VP8SetResidualCoeffs = SetResidualCoeffs; - if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2; - } -#endif - } -} - //------------------------------------------------------------------------------ // Mode costs diff --git a/src/enc/cost.h b/src/enc/cost.h index 4e558952..7f44bb79 100644 --- a/src/enc/cost.h +++ b/src/enc/cost.h @@ -24,7 +24,8 @@ extern "C" { // On-the-fly info about the current set of residuals. Handy to avoid // passing zillions of params. -typedef struct { +typedef struct VP8Residual VP8Residual; +struct VP8Residual { int first; int last; const int16_t* coeffs; @@ -33,17 +34,11 @@ typedef struct { ProbaArray* prob; StatsArray* stats; CostArray* cost; -} VP8Residual; +}; void VP8InitResidual(int first, int coeff_type, VP8Encoder* const enc, VP8Residual* const res); -typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs, - VP8Residual* const res); -extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; - -void VP8SetResidualCoeffsInit(void); // must be called first - int VP8RecordCoeffs(int ctx, const VP8Residual* const res); // approximate cost per level: @@ -55,12 +50,6 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba]; } -// Cost calculation function. -typedef int (*VP8GetResidualCostFunc)(int ctx0, const VP8Residual* const res); -extern VP8GetResidualCostFunc VP8GetResidualCost; - -void VP8GetResidualCostInit(void); // must be called first - // Level cost calculations extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; void VP8CalculateLevelCosts(VP8Proba* const proba); diff --git a/src/enc/frame.c b/src/enc/frame.c index cdf1dabf..979ed0b4 100644 --- a/src/enc/frame.c +++ b/src/enc/frame.c @@ -14,8 +14,9 @@ #include #include -#include "./vp8enci.h" #include "./cost.h" +#include "./vp8enci.h" +#include "../dsp/dsp.h" #include "../webp/format_constants.h" // RIFF constants #define SEGMENT_VISU 0 diff --git a/src/enc/webpenc.c b/src/enc/webpenc.c index dbc07065..b208fcd4 100644 --- a/src/enc/webpenc.c +++ b/src/enc/webpenc.c @@ -16,9 +16,9 @@ #include #include +#include "./cost.h" #include "./vp8enci.h" #include "./vp8li.h" -#include "./cost.h" #include "../utils/utils.h" // #define PRINT_MEMORY_INFO @@ -225,8 +225,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, ResetSegmentHeader(enc); ResetFilterHeader(enc); ResetBoundaryPredictions(enc); - VP8GetResidualCostInit(); - VP8SetResidualCoeffsInit(); + VP8EncDspCostInit(); VP8EncInitAlpha(enc); // lower quality means smaller output -> we modulate a little the page