diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c index 69604338..dfead26d 100644 --- a/src/dsp/alpha_processing.c +++ b/src/dsp/alpha_processing.c @@ -220,6 +220,7 @@ void WebPMultRows(uint8_t* ptr, int stride, #define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24) #endif +#if !WEBP_NEON_OMIT_C_CODE static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first, int w, int h, int stride) { while (h-- > 0) { @@ -238,6 +239,7 @@ static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first, rgba += stride; } } +#endif // !WEBP_NEON_OMIT_C_CODE #undef MULTIPLIER #undef PREMULTIPLY @@ -287,6 +289,7 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, #endif } +#if !WEBP_NEON_OMIT_C_CODE static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, int width, int height, uint8_t* dst, int dst_stride) { @@ -341,6 +344,7 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) { int i; for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8; } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // Simple channel manipulations. @@ -383,15 +387,16 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) { WebPMultARGBRow = WebPMultARGBRow_C; WebPMultRow = WebPMultRow_C; - WebPApplyAlphaMultiply = ApplyAlphaMultiply_C; WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C; + WebPPackRGB = PackRGB_C; +#if !WEBP_NEON_OMIT_C_CODE + WebPApplyAlphaMultiply = ApplyAlphaMultiply_C; WebPDispatchAlpha = DispatchAlpha_C; WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C; WebPExtractAlpha = ExtractAlpha_C; WebPExtractGreen = ExtractGreen_C; - - WebPPackRGB = PackRGB_C; +#endif // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { @@ -405,16 +410,29 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) { #endif } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - WebPInitAlphaProcessingNEON(); - } -#endif #if defined(WEBP_USE_MIPS_DSP_R2) if (VP8GetCPUInfo(kMIPSdspR2)) { WebPInitAlphaProcessingMIPSdspR2(); } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + WebPInitAlphaProcessingNEON(); + } +#endif + + assert(WebPMultARGBRow != NULL); + assert(WebPMultRow != NULL); + assert(WebPApplyAlphaMultiply != NULL); + assert(WebPApplyAlphaMultiply4444 != NULL); + assert(WebPDispatchAlpha != NULL); + assert(WebPDispatchAlphaToGreen != NULL); + assert(WebPExtractAlpha != NULL); + assert(WebPExtractGreen != NULL); + assert(WebPPackRGB != NULL); + alpha_processing_last_cpuinfo_used = VP8GetCPUInfo; } diff --git a/src/dsp/dec.c b/src/dsp/dec.c index 13e852bf..1036d129 100644 --- a/src/dsp/dec.c +++ b/src/dsp/dec.c @@ -11,6 +11,8 @@ // // Author: Skal (pascal.massimino@gmail.com) +#include + #include "src/dsp/dsp.h" #include "src/dec/vp8i_dec.h" #include "src/utils/utils.h" @@ -38,6 +40,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) { #define MUL1(a) ((((a) * 20091) >> 16) + (a)) #define MUL2(a) (((a) * 35468) >> 16) +#if !WEBP_NEON_OMIT_C_CODE static void TransformOne(const int16_t* in, uint8_t* dst) { int C[4 * 4], *tmp; int i; @@ -99,12 +102,14 @@ static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { TransformOne(in + 16, dst + 4); } } +#endif // !WEBP_NEON_OMIT_C_CODE static void TransformUV(const int16_t* in, uint8_t* dst) { VP8Transform(in + 0 * 16, dst, 1); VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); } +#if !WEBP_NEON_OMIT_C_CODE static void TransformDC(const int16_t* in, uint8_t* dst) { const int DC = in[0] + 4; int i, j; @@ -114,6 +119,7 @@ static void TransformDC(const int16_t* in, uint8_t* dst) { } } } +#endif // !WEBP_NEON_OMIT_C_CODE static void TransformDCUV(const int16_t* in, uint8_t* dst) { if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst); @@ -127,6 +133,7 @@ static void TransformDCUV(const int16_t* in, uint8_t* dst) { //------------------------------------------------------------------------------ // Paragraph 14.3 +#if !WEBP_NEON_OMIT_C_CODE static void TransformWHT(const int16_t* in, int16_t* out) { int tmp[16]; int i; @@ -153,6 +160,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) { out += 64; } } +#endif // !WEBP_NEON_OMIT_C_CODE void (*VP8TransformWHT)(const int16_t* in, int16_t* out); @@ -161,6 +169,7 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out); #define DST(x, y) dst[(x) + (y) * BPS] +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) { const uint8_t* top = dst - BPS; const uint8_t* const clip0 = VP8kclip1 - top[-1]; @@ -233,6 +242,7 @@ static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples Put16(0x80, dst); } +#endif // !WEBP_NEON_OMIT_C_CODE VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; @@ -242,6 +252,7 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2)) #define AVG2(a, b) (((a) + (b) + 1) >> 1) +#if !WEBP_NEON_OMIT_C_CODE static void VE4(uint8_t* dst) { // vertical const uint8_t* top = dst - BPS; const uint8_t vals[4] = { @@ -255,6 +266,7 @@ static void VE4(uint8_t* dst) { // vertical memcpy(dst + i * BPS, vals, sizeof(vals)); } } +#endif // !WEBP_NEON_OMIT_C_CODE static void HE4(uint8_t* dst) { // horizontal const int A = dst[-1 - BPS]; @@ -268,6 +280,7 @@ static void HE4(uint8_t* dst) { // horizontal WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E)); } +#if !WEBP_NEON_OMIT_C_CODE static void DC4(uint8_t* dst) { // DC uint32_t dc = 4; int i; @@ -312,6 +325,7 @@ static void LD4(uint8_t* dst) { // Down-Left DST(3, 2) = DST(2, 3) = AVG3(F, G, H); DST(3, 3) = AVG3(G, H, H); } +#endif // !WEBP_NEON_OMIT_C_CODE static void VR4(uint8_t* dst) { // Vertical-Right const int I = dst[-1 + 0 * BPS]; @@ -404,6 +418,7 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES]; //------------------------------------------------------------------------------ // Chroma +#if !WEBP_NEON_OMIT_C_CODE static void VE8uv(uint8_t* dst) { // vertical int j; for (j = 0; j < 8; ++j) { @@ -457,12 +472,14 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing Put8x8uv(0x80, dst); } +#endif // !WEBP_NEON_OMIT_C_CODE VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES]; //------------------------------------------------------------------------------ // Edge filtering functions +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC // 4 pixels in, 2 pixels out static WEBP_INLINE void do_filter2(uint8_t* p, int step) { const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; @@ -507,12 +524,16 @@ static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) { const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh); } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) { const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step]; return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t); } +#endif // !WEBP_NEON_OMIT_C_CODE +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC static WEBP_INLINE int needs_filter2(const uint8_t* p, int step, int t, int it) { const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step]; @@ -523,10 +544,12 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p, VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it && VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it; } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC //------------------------------------------------------------------------------ // Simple In-loop filtering (Paragraph 15.2) +#if !WEBP_NEON_OMIT_C_CODE static void SimpleVFilter16(uint8_t* p, int stride, int thresh) { int i; const int thresh2 = 2 * thresh + 1; @@ -562,10 +585,12 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) { SimpleHFilter16(p, stride, thresh); } } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // Complex In-loop filtering (Paragraph 15.3) +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC static WEBP_INLINE void FilterLoop26(uint8_t* p, int hstride, int vstride, int size, int thresh, int ithresh, int hev_thresh) { @@ -597,7 +622,9 @@ static WEBP_INLINE void FilterLoop24(uint8_t* p, p += vstride; } } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC +#if !WEBP_NEON_OMIT_C_CODE // on macroblock edges static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) { @@ -618,7 +645,9 @@ static void VFilter16i(uint8_t* p, int stride, FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh); } } +#endif // !WEBP_NEON_OMIT_C_CODE +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh, int hev_thresh) { int k; @@ -627,31 +656,40 @@ static void HFilter16i(uint8_t* p, int stride, FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh); } } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC +#if !WEBP_NEON_OMIT_C_CODE // 8-pixels wide variant, for chroma filtering static void VFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) { FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh); FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh); } +#endif // !WEBP_NEON_OMIT_C_CODE +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC static void HFilter8(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) { FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh); FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh); } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC +#if !WEBP_NEON_OMIT_C_CODE static void VFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) { FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); } +#endif // !WEBP_NEON_OMIT_C_CODE +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC static void HFilter8i(uint8_t* u, uint8_t* v, int stride, int thresh, int ithresh, int hev_thresh) { FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC //------------------------------------------------------------------------------ @@ -709,37 +747,48 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) { VP8InitClipTables(); +#if !WEBP_NEON_OMIT_C_CODE VP8TransformWHT = TransformWHT; VP8Transform = TransformTwo; - VP8TransformUV = TransformUV; VP8TransformDC = TransformDC; - VP8TransformDCUV = TransformDCUV; VP8TransformAC3 = TransformAC3; +#endif + VP8TransformUV = TransformUV; + VP8TransformDCUV = TransformDCUV; +#if !WEBP_NEON_OMIT_C_CODE VP8VFilter16 = VFilter16; + VP8VFilter16i = VFilter16i; VP8HFilter16 = HFilter16; VP8VFilter8 = VFilter8; - VP8HFilter8 = HFilter8; - VP8VFilter16i = VFilter16i; - VP8HFilter16i = HFilter16i; VP8VFilter8i = VFilter8i; - VP8HFilter8i = HFilter8i; VP8SimpleVFilter16 = SimpleVFilter16; VP8SimpleHFilter16 = SimpleHFilter16; VP8SimpleVFilter16i = SimpleVFilter16i; VP8SimpleHFilter16i = SimpleHFilter16i; +#endif +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC + VP8HFilter16i = HFilter16i; + VP8HFilter8 = HFilter8; + VP8HFilter8i = HFilter8i; +#endif + +#if !WEBP_NEON_OMIT_C_CODE VP8PredLuma4[0] = DC4; VP8PredLuma4[1] = TM4; VP8PredLuma4[2] = VE4; - VP8PredLuma4[3] = HE4; VP8PredLuma4[4] = RD4; - VP8PredLuma4[5] = VR4; VP8PredLuma4[6] = LD4; +#endif + + VP8PredLuma4[3] = HE4; + VP8PredLuma4[5] = VR4; VP8PredLuma4[7] = VL4; VP8PredLuma4[8] = HD4; VP8PredLuma4[9] = HU4; +#if !WEBP_NEON_OMIT_C_CODE VP8PredLuma16[0] = DC16; VP8PredLuma16[1] = TM16; VP8PredLuma16[2] = VE16; @@ -755,6 +804,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) { VP8PredChroma8[4] = DC8uvNoTop; VP8PredChroma8[5] = DC8uvNoLeft; VP8PredChroma8[6] = DC8uvNoTopLeft; +#endif VP8DitherCombine8x8 = DitherCombine8x8; @@ -770,11 +820,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) { #endif } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - VP8DspInitNEON(); - } -#endif #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { VP8DspInitMIPS32(); @@ -791,5 +836,57 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + VP8DspInitNEON(); + } +#endif + + assert(VP8TransformWHT != NULL); + assert(VP8Transform != NULL); + assert(VP8TransformDC != NULL); + assert(VP8TransformAC3 != NULL); + assert(VP8TransformUV != NULL); + assert(VP8TransformDCUV != NULL); + assert(VP8VFilter16 != NULL); + assert(VP8HFilter16 != NULL); + assert(VP8VFilter8 != NULL); + assert(VP8HFilter8 != NULL); + assert(VP8VFilter16i != NULL); + assert(VP8HFilter16i != NULL); + assert(VP8VFilter8i != NULL); + assert(VP8HFilter8i != NULL); + assert(VP8SimpleVFilter16 != NULL); + assert(VP8SimpleHFilter16 != NULL); + assert(VP8SimpleVFilter16i != NULL); + assert(VP8SimpleHFilter16i != NULL); + assert(VP8PredLuma4[0] != NULL); + assert(VP8PredLuma4[1] != NULL); + assert(VP8PredLuma4[2] != NULL); + assert(VP8PredLuma4[3] != NULL); + assert(VP8PredLuma4[4] != NULL); + assert(VP8PredLuma4[5] != NULL); + assert(VP8PredLuma4[6] != NULL); + assert(VP8PredLuma4[7] != NULL); + assert(VP8PredLuma4[8] != NULL); + assert(VP8PredLuma4[9] != NULL); + assert(VP8PredLuma16[0] != NULL); + assert(VP8PredLuma16[1] != NULL); + assert(VP8PredLuma16[2] != NULL); + assert(VP8PredLuma16[3] != NULL); + assert(VP8PredLuma16[4] != NULL); + assert(VP8PredLuma16[5] != NULL); + assert(VP8PredLuma16[6] != NULL); + assert(VP8PredChroma8[0] != NULL); + assert(VP8PredChroma8[1] != NULL); + assert(VP8PredChroma8[2] != NULL); + assert(VP8PredChroma8[3] != NULL); + assert(VP8PredChroma8[4] != NULL); + assert(VP8PredChroma8[5] != NULL); + assert(VP8PredChroma8[6] != NULL); + assert(VP8DitherCombine8x8 != NULL); + dec_last_cpuinfo_used = VP8GetCPUInfo; } diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 25efc18f..d2001121 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -116,6 +116,22 @@ extern "C" { #endif /* EMSCRIPTEN */ +#ifndef WEBP_DSP_OMIT_C_CODE +#define WEBP_DSP_OMIT_C_CODE 1 +#endif + +#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE +#define WEBP_NEON_OMIT_C_CODE 1 +#else +#define WEBP_NEON_OMIT_C_CODE 0 +#endif + +#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__)) +#define WEBP_NEON_WORK_AROUND_GCC 1 +#else +#define WEBP_NEON_WORK_AROUND_GCC 0 +#endif + // This macro prevents thread_sanitizer from reporting known concurrent writes. #define WEBP_TSAN_IGNORE_FUNCTION #if defined(__has_feature) diff --git a/src/dsp/enc.c b/src/dsp/enc.c index bf59d5b8..1c807f1d 100644 --- a/src/dsp/enc.c +++ b/src/dsp/enc.c @@ -21,9 +21,11 @@ static WEBP_INLINE uint8_t clip_8b(int v) { return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; } +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE int clip_max(int v, int max) { return (v > max) ? max : v; } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // Compute susceptibility based on DCT-coeff histograms: @@ -56,6 +58,7 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], histo->last_non_zero = last_non_zero; } +#if !WEBP_NEON_OMIT_C_CODE static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) { @@ -76,6 +79,7 @@ static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred, } VP8SetHistogramData(distribution, histo); } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // run-time tables (~4k) @@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) { //------------------------------------------------------------------------------ // Transforms (Paragraph 14.4) +#if !WEBP_NEON_OMIT_C_CODE + #define STORE(x, y, v) \ dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) @@ -176,6 +182,7 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) { out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); } } +#endif // !WEBP_NEON_OMIT_C_CODE static void FTransform2_C(const uint8_t* src, const uint8_t* ref, int16_t* out) { @@ -183,6 +190,7 @@ static void FTransform2_C(const uint8_t* src, const uint8_t* ref, VP8FTransform(src + 4, ref + 4, out + 16); } +#if !WEBP_NEON_OMIT_C_CODE static void FTransformWHT_C(const int16_t* in, int16_t* out) { // input is 12b signed int32_t tmp[16]; @@ -212,6 +220,7 @@ static void FTransformWHT_C(const int16_t* in, int16_t* out) { out[12 + i] = b3 >> 1; } } +#endif // !WEBP_NEON_OMIT_C_CODE #undef MUL #undef STORE @@ -524,6 +533,7 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) { //------------------------------------------------------------------------------ // Metric +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) { int count = 0; @@ -551,6 +561,7 @@ static int SSE8x8_C(const uint8_t* a, const uint8_t* b) { static int SSE4x4_C(const uint8_t* a, const uint8_t* b) { return GetSSE(a, b, 4, 4); } +#endif // !WEBP_NEON_OMIT_C_CODE static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) { int k, x, y; @@ -572,6 +583,7 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) { // We try to match the spectral content (weighted) between source and // reconstructed samples. +#if !WEBP_NEON_OMIT_C_CODE // Hadamard transform // Returns the weighted sum of the absolute value of transformed coefficients. // w[] contains a row-major 4 by 4 symmetric matrix. @@ -627,6 +639,7 @@ static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b, } return D; } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // Quantization @@ -663,6 +676,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16], return (last >= 0); } +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC static int Quantize2Blocks_C(int16_t in[32], int16_t out[32], const VP8Matrix* const mtx) { int nz; @@ -670,6 +684,7 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32], nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; return nz; } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC //------------------------------------------------------------------------------ // Block copy @@ -735,23 +750,29 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { InitTables(); // default C implementations - VP8CollectHistogram = CollectHistogram_C; +#if !WEBP_NEON_OMIT_C_CODE VP8ITransform = ITransform_C; VP8FTransform = FTransform_C; - VP8FTransform2 = FTransform2_C; VP8FTransformWHT = FTransformWHT_C; + VP8TDisto4x4 = Disto4x4_C; + VP8TDisto16x16 = Disto16x16_C; + VP8CollectHistogram = CollectHistogram_C; + VP8SSE16x16 = SSE16x16_C; + VP8SSE16x8 = SSE16x8_C; + VP8SSE8x8 = SSE8x8_C; + VP8SSE4x4 = SSE4x4_C; +#endif + +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC + VP8EncQuantizeBlock = QuantizeBlock_C; + VP8EncQuantize2Blocks = Quantize2Blocks_C; +#endif + + VP8FTransform2 = FTransform2_C; VP8EncPredLuma4 = Intra4Preds_C; VP8EncPredLuma16 = Intra16Preds_C; VP8EncPredChroma8 = IntraChromaPreds_C; - VP8SSE16x16 = SSE16x16_C; - VP8SSE8x8 = SSE8x8_C; - VP8SSE16x8 = SSE16x8_C; - VP8SSE4x4 = SSE4x4_C; - VP8TDisto4x4 = Disto4x4_C; - VP8TDisto16x16 = Disto16x16_C; VP8Mean16x4 = Mean16x4_C; - VP8EncQuantizeBlock = QuantizeBlock_C; - VP8EncQuantize2Blocks = Quantize2Blocks_C; VP8EncQuantizeBlockWHT = QuantizeBlock_C; VP8Copy4x4 = Copy4x4_C; VP8Copy16x8 = Copy16x8_C; @@ -773,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { VP8EncDspInitAVX2(); } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - VP8EncDspInitNEON(); - } -#endif #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { VP8EncDspInitMIPS32(); @@ -794,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + VP8EncDspInitNEON(); + } +#endif + + assert(VP8ITransform != NULL); + assert(VP8FTransform != NULL); + assert(VP8FTransformWHT != NULL); + assert(VP8TDisto4x4 != NULL); + assert(VP8TDisto16x16 != NULL); + assert(VP8CollectHistogram != NULL); + assert(VP8SSE16x16 != NULL); + assert(VP8SSE16x8 != NULL); + assert(VP8SSE8x8 != NULL); + assert(VP8SSE4x4 != NULL); + assert(VP8EncQuantizeBlock != NULL); + assert(VP8EncQuantize2Blocks != NULL); + assert(VP8FTransform2 != NULL); + assert(VP8EncPredLuma4 != NULL); + assert(VP8EncPredLuma16 != NULL); + assert(VP8EncPredChroma8 != NULL); + assert(VP8Mean16x4 != NULL); + assert(VP8EncQuantizeBlockWHT != NULL); + assert(VP8Copy4x4 != NULL); + assert(VP8Copy16x8 != NULL); + enc_last_cpuinfo_used = VP8GetCPUInfo; } diff --git a/src/dsp/filters.c b/src/dsp/filters.c index 7759126f..ca5f877d 100644 --- a/src/dsp/filters.c +++ b/src/dsp/filters.c @@ -28,6 +28,7 @@ assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \ (void)height; // Silence unused warning. +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred, uint8_t* dst, int length, int inverse) { int i; @@ -112,6 +113,7 @@ static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in, out += stride; } } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // Gradient filter. @@ -121,6 +123,7 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) { return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit } +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in, int width, int height, int stride, int row, int num_rows, @@ -160,11 +163,13 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in, out += stride; } } +#endif // !WEBP_NEON_OMIT_C_CODE #undef SANITY_CHECK //------------------------------------------------------------------------------ +#if !WEBP_NEON_OMIT_C_CODE static void HorizontalFilter_C(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) { DoHorizontalFilter_C(data, width, height, stride, 0, height, 0, @@ -180,7 +185,7 @@ static void GradientFilter_C(const uint8_t* data, int width, int height, int stride, uint8_t* filtered_data) { DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data); } - +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ @@ -194,6 +199,7 @@ static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in, } } +#if !WEBP_NEON_OMIT_C_CODE static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in, uint8_t* out, int width) { if (prev == NULL) { @@ -203,6 +209,7 @@ static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in, for (i = 0; i < width; ++i) out[i] = prev[i] + in[i]; } } +#endif // !WEBP_NEON_OMIT_C_CODE static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in, uint8_t* out, int width) { @@ -238,14 +245,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) { if (filters_last_cpuinfo_used == VP8GetCPUInfo) return; WebPUnfilters[WEBP_FILTER_NONE] = NULL; +#if !WEBP_NEON_OMIT_C_CODE WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C; WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C; +#endif WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C; WebPFilters[WEBP_FILTER_NONE] = NULL; +#if !WEBP_NEON_OMIT_C_CODE WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C; WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C; WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C; +#endif if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) @@ -253,11 +264,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) { VP8FiltersInitSSE2(); } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - VP8FiltersInitNEON(); - } -#endif #if defined(WEBP_USE_MIPS_DSP_R2) if (VP8GetCPUInfo(kMIPSdspR2)) { VP8FiltersInitMIPSdspR2(); @@ -269,5 +275,20 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + VP8FiltersInitNEON(); + } +#endif + + assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL); + assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL); + assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL); + assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL); + assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL); + assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL); + filters_last_cpuinfo_used = VP8GetCPUInfo; } diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 8b03b1bd..83f553d9 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -15,6 +15,7 @@ #include "src/dsp/dsp.h" +#include #include #include #include "src/dec/vp8li_dec.h" @@ -606,15 +607,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) { COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd) COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C) +#if !WEBP_NEON_OMIT_C_CODE VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C; VP8LTransformColorInverse = VP8LTransformColorInverse_C; - VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C; VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C; + VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C; + VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; +#endif + VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C; VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C; - VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; VP8LMapColor32b = MapARGB_C; VP8LMapColor8b = MapAlpha_C; @@ -626,11 +630,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) { VP8LDspInitSSE2(); } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - VP8LDspInitNEON(); - } -#endif #if defined(WEBP_USE_MIPS_DSP_R2) if (VP8GetCPUInfo(kMIPSdspR2)) { VP8LDspInitMIPSdspR2(); @@ -642,6 +641,24 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + VP8LDspInitNEON(); + } +#endif + + assert(VP8LAddGreenToBlueAndRed != NULL); + assert(VP8LTransformColorInverse != NULL); + assert(VP8LConvertBGRAToRGBA != NULL); + assert(VP8LConvertBGRAToRGB != NULL); + assert(VP8LConvertBGRAToBGR != NULL); + assert(VP8LConvertBGRAToRGBA4444 != NULL); + assert(VP8LConvertBGRAToRGB565 != NULL); + assert(VP8LMapColor32b != NULL); + assert(VP8LMapColor8b != NULL); + lossless_last_cpuinfo_used = VP8GetCPUInfo; } #undef COPY_PREDICTOR_ARRAY diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c index b8a6baba..92ca3c05 100644 --- a/src/dsp/lossless_enc.c +++ b/src/dsp/lossless_enc.c @@ -15,6 +15,7 @@ #include "src/dsp/dsp.h" +#include #include #include #include "src/dec/vp8li_dec.h" @@ -870,9 +871,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) { VP8LDspInit(); +#if !WEBP_NEON_OMIT_C_CODE VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C; VP8LTransformColor = VP8LTransformColor_C; +#endif VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C; VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C; @@ -938,11 +941,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) { #endif } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - VP8LEncDspInitNEON(); - } -#endif #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { VP8LEncDspInitMIPS32(); @@ -959,6 +957,61 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + VP8LEncDspInitNEON(); + } +#endif + + assert(VP8LSubtractGreenFromBlueAndRed != NULL); + assert(VP8LTransformColor != NULL); + assert(VP8LCollectColorBlueTransforms != NULL); + assert(VP8LCollectColorRedTransforms != NULL); + assert(VP8LFastLog2Slow != NULL); + assert(VP8LFastSLog2Slow != NULL); + assert(VP8LExtraCost != NULL); + assert(VP8LExtraCostCombined != NULL); + assert(VP8LCombinedShannonEntropy != NULL); + assert(VP8LGetEntropyUnrefined != NULL); + assert(VP8LGetCombinedEntropyUnrefined != NULL); + assert(VP8LHistogramAdd != NULL); + assert(VP8LVectorMismatch != NULL); + assert(VP8LBundleColorMap != NULL); + assert(VP8LPredictorsSub[0] != NULL); + assert(VP8LPredictorsSub[1] != NULL); + assert(VP8LPredictorsSub[2] != NULL); + assert(VP8LPredictorsSub[3] != NULL); + assert(VP8LPredictorsSub[4] != NULL); + assert(VP8LPredictorsSub[5] != NULL); + assert(VP8LPredictorsSub[6] != NULL); + assert(VP8LPredictorsSub[7] != NULL); + assert(VP8LPredictorsSub[8] != NULL); + assert(VP8LPredictorsSub[9] != NULL); + assert(VP8LPredictorsSub[10] != NULL); + assert(VP8LPredictorsSub[11] != NULL); + assert(VP8LPredictorsSub[12] != NULL); + assert(VP8LPredictorsSub[13] != NULL); + assert(VP8LPredictorsSub[14] != NULL); + assert(VP8LPredictorsSub[15] != NULL); + assert(VP8LPredictorsSub_C[0] != NULL); + assert(VP8LPredictorsSub_C[1] != NULL); + assert(VP8LPredictorsSub_C[2] != NULL); + assert(VP8LPredictorsSub_C[3] != NULL); + assert(VP8LPredictorsSub_C[4] != NULL); + assert(VP8LPredictorsSub_C[5] != NULL); + assert(VP8LPredictorsSub_C[6] != NULL); + assert(VP8LPredictorsSub_C[7] != NULL); + assert(VP8LPredictorsSub_C[8] != NULL); + assert(VP8LPredictorsSub_C[9] != NULL); + assert(VP8LPredictorsSub_C[10] != NULL); + assert(VP8LPredictorsSub_C[11] != NULL); + assert(VP8LPredictorsSub_C[12] != NULL); + assert(VP8LPredictorsSub_C[13] != NULL); + assert(VP8LPredictorsSub_C[14] != NULL); + assert(VP8LPredictorsSub_C[15] != NULL); + lossless_enc_last_cpuinfo_used = VP8GetCPUInfo; } diff --git a/src/dsp/rescaler.c b/src/dsp/rescaler.c index 5d567339..dc61325b 100644 --- a/src/dsp/rescaler.c +++ b/src/dsp/rescaler.c @@ -210,10 +210,13 @@ static volatile VP8CPUInfo rescaler_last_cpuinfo_used = WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) { if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return; - WebPRescalerImportRowExpand = WebPRescalerImportRowExpand_C; - WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C; +#if !WEBP_NEON_OMIT_C_CODE WebPRescalerExportRowExpand = WebPRescalerExportRowExpand_C; WebPRescalerExportRowShrink = WebPRescalerExportRowShrink_C; +#endif + + WebPRescalerImportRowExpand = WebPRescalerImportRowExpand_C; + WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C; if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) @@ -221,11 +224,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) { WebPRescalerDspInitSSE2(); } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - WebPRescalerDspInitNEON(); - } -#endif #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { WebPRescalerDspInitMIPS32(); @@ -242,5 +240,18 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + WebPRescalerDspInitNEON(); + } +#endif + + assert(WebPRescalerExportRowExpand != NULL); + assert(WebPRescalerExportRowShrink != NULL); + assert(WebPRescalerImportRowExpand != NULL); + assert(WebPRescalerImportRowShrink != NULL); + rescaler_last_cpuinfo_used = VP8GetCPUInfo; } diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c index 07dc54a7..a6cfc7fe 100644 --- a/src/dsp/upsampling.c +++ b/src/dsp/upsampling.c @@ -93,6 +93,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ } // All variants implemented. +#if !WEBP_NEON_OMIT_C_CODE UPSAMPLE_FUNC(UpsampleRgbLinePair_C, VP8YuvToRgb, 3) UPSAMPLE_FUNC(UpsampleBgrLinePair_C, VP8YuvToBgr, 3) UPSAMPLE_FUNC(UpsampleRgbaLinePair_C, VP8YuvToRgba, 4) @@ -100,6 +101,7 @@ UPSAMPLE_FUNC(UpsampleBgraLinePair_C, VP8YuvToBgra, 4) UPSAMPLE_FUNC(UpsampleArgbLinePair_C, VP8YuvToArgb, 4) UPSAMPLE_FUNC(UpsampleRgba4444LinePair_C, VP8YuvToRgba4444, 2) UPSAMPLE_FUNC(UpsampleRgb565LinePair_C, VP8YuvToRgb565, 2) +#endif #undef LOAD_UV #undef UPSAMPLE_FUNC @@ -223,6 +225,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) { if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return; #ifdef FANCY_UPSAMPLING +#if !WEBP_NEON_OMIT_C_CODE WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair_C; WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_C; WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair_C; @@ -234,6 +237,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) { WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_C; WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_C; WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_C; +#endif // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { @@ -242,11 +246,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) { WebPInitUpsamplersSSE2(); } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - WebPInitUpsamplersNEON(); - } -#endif #if defined(WEBP_USE_MIPS_DSP_R2) if (VP8GetCPUInfo(kMIPSdspR2)) { WebPInitUpsamplersMIPSdspR2(); @@ -258,6 +257,26 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + WebPInitUpsamplersNEON(); + } +#endif + + assert(WebPUpsamplers[MODE_RGB] != NULL); + assert(WebPUpsamplers[MODE_RGBA] != NULL); + assert(WebPUpsamplers[MODE_BGR] != NULL); + assert(WebPUpsamplers[MODE_BGRA] != NULL); + assert(WebPUpsamplers[MODE_ARGB] != NULL); + assert(WebPUpsamplers[MODE_RGBA_4444] != NULL); + assert(WebPUpsamplers[MODE_RGB_565] != NULL); + assert(WebPUpsamplers[MODE_rgbA] != NULL); + assert(WebPUpsamplers[MODE_bgrA] != NULL); + assert(WebPUpsamplers[MODE_Argb] != NULL); + assert(WebPUpsamplers[MODE_rgbA_4444] != NULL); + #endif // FANCY_UPSAMPLING upsampling_last_cpuinfo_used2 = VP8GetCPUInfo; } diff --git a/src/dsp/yuv.c b/src/dsp/yuv.c index 437744cc..bddf81fe 100644 --- a/src/dsp/yuv.c +++ b/src/dsp/yuv.c @@ -13,6 +13,7 @@ #include "src/dsp/yuv.h" +#include #include //----------------------------------------------------------------------------- @@ -193,6 +194,7 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, //----------------------------------------------------------------------------- +#if !WEBP_NEON_OMIT_C_CODE #define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic static uint16_t clip_y(int v) { return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; @@ -230,6 +232,7 @@ static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len, out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); } } +#endif // !WEBP_NEON_OMIT_C_CODE #undef MAX_Y @@ -270,9 +273,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) { WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; +#if !WEBP_NEON_OMIT_C_CODE WebPSharpYUVUpdateY = SharpYUVUpdateY_C; WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C; WebPSharpYUVFilterRow = SharpYUVFilterRow_C; +#endif if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) @@ -281,13 +286,24 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) { WebPInitSharpYUVSSE2(); } #endif // WEBP_USE_SSE2 + } + #if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - WebPInitConvertARGBToYUVNEON(); - WebPInitSharpYUVNEON(); - } + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + WebPInitConvertARGBToYUVNEON(); + WebPInitSharpYUVNEON(); + } #endif // WEBP_USE_NEON - } + assert(WebPConvertARGBToY != NULL); + assert(WebPConvertARGBToUV != NULL); + assert(WebPConvertRGB24ToY != NULL); + assert(WebPConvertBGR24ToY != NULL); + assert(WebPConvertRGBA32ToUV != NULL); + assert(WebPSharpYUVUpdateY != NULL); + assert(WebPSharpYUVUpdateRGB != NULL); + assert(WebPSharpYUVFilterRow != NULL); + rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo; }