diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c
index 69604338..dfead26d 100644
--- a/src/dsp/alpha_processing.c
+++ b/src/dsp/alpha_processing.c
@@ -220,6 +220,7 @@ void WebPMultRows(uint8_t* ptr, int stride,
 #define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
 #endif
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
                                  int w, int h, int stride) {
   while (h-- > 0) {
@@ -238,6 +239,7 @@ static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
     rgba += stride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 #undef MULTIPLIER
 #undef PREMULTIPLY
 
@@ -287,6 +289,7 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
 #endif
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
                            int width, int height,
                            uint8_t* dst, int dst_stride) {
@@ -341,6 +344,7 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
   int i;
   for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Simple channel manipulations.
@@ -383,15 +387,16 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
 
   WebPMultARGBRow = WebPMultARGBRow_C;
   WebPMultRow = WebPMultRow_C;
-  WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
   WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
 
+  WebPPackRGB = PackRGB_C;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
   WebPDispatchAlpha = DispatchAlpha_C;
   WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
   WebPExtractAlpha = ExtractAlpha_C;
   WebPExtractGreen = ExtractGreen_C;
-
-  WebPPackRGB = PackRGB_C;
+#endif
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
@@ -405,16 +410,29 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
 #endif
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      WebPInitAlphaProcessingNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       WebPInitAlphaProcessingMIPSdspR2();
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPInitAlphaProcessingNEON();
+  }
+#endif
+
+  assert(WebPMultARGBRow != NULL);
+  assert(WebPMultRow != NULL);
+  assert(WebPApplyAlphaMultiply != NULL);
+  assert(WebPApplyAlphaMultiply4444 != NULL);
+  assert(WebPDispatchAlpha != NULL);
+  assert(WebPDispatchAlphaToGreen != NULL);
+  assert(WebPExtractAlpha != NULL);
+  assert(WebPExtractGreen != NULL);
+  assert(WebPPackRGB != NULL);
+
   alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/src/dsp/dec.c b/src/dsp/dec.c
index 13e852bf..1036d129 100644
--- a/src/dsp/dec.c
+++ b/src/dsp/dec.c
@@ -11,6 +11,8 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
+#include <assert.h>
+
 #include "src/dsp/dsp.h"
 #include "src/dec/vp8i_dec.h"
 #include "src/utils/utils.h"
@@ -38,6 +40,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 #define MUL1(a) ((((a) * 20091) >> 16) + (a))
 #define MUL2(a) (((a) * 35468) >> 16)
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void TransformOne(const int16_t* in, uint8_t* dst) {
   int C[4 * 4], *tmp;
   int i;
@@ -99,12 +102,14 @@ static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
     TransformOne(in + 16, dst + 4);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 static void TransformUV(const int16_t* in, uint8_t* dst) {
   VP8Transform(in + 0 * 16, dst, 1);
   VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void TransformDC(const int16_t* in, uint8_t* dst) {
   const int DC = in[0] + 4;
   int i, j;
@@ -114,6 +119,7 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
     }
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 static void TransformDCUV(const int16_t* in, uint8_t* dst) {
   if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
@@ -127,6 +133,7 @@ static void TransformDCUV(const int16_t* in, uint8_t* dst) {
 //------------------------------------------------------------------------------
 // Paragraph 14.3
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void TransformWHT(const int16_t* in, int16_t* out) {
   int tmp[16];
   int i;
@@ -153,6 +160,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
     out += 64;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
 
@@ -161,6 +169,7 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
 
 #define DST(x, y) dst[(x) + (y) * BPS]
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
   const uint8_t* top = dst - BPS;
   const uint8_t* const clip0 = VP8kclip1 - top[-1];
@@ -233,6 +242,7 @@ static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
 static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
   Put16(0x80, dst);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
 
@@ -242,6 +252,7 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
 #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void VE4(uint8_t* dst) {    // vertical
   const uint8_t* top = dst - BPS;
   const uint8_t vals[4] = {
@@ -255,6 +266,7 @@ static void VE4(uint8_t* dst) {    // vertical
     memcpy(dst + i * BPS, vals, sizeof(vals));
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 static void HE4(uint8_t* dst) {    // horizontal
   const int A = dst[-1 - BPS];
@@ -268,6 +280,7 @@ static void HE4(uint8_t* dst) {    // horizontal
   WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void DC4(uint8_t* dst) {   // DC
   uint32_t dc = 4;
   int i;
@@ -312,6 +325,7 @@ static void LD4(uint8_t* dst) {   // Down-Left
                           DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
                                       DST(3, 3) = AVG3(G, H, H);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 static void VR4(uint8_t* dst) {   // Vertical-Right
   const int I = dst[-1 + 0 * BPS];
@@ -404,6 +418,7 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
 //------------------------------------------------------------------------------
 // Chroma
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void VE8uv(uint8_t* dst) {    // vertical
   int j;
   for (j = 0; j < 8; ++j) {
@@ -457,12 +472,14 @@ static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
 static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
   Put8x8uv(0x80, dst);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
 
 //------------------------------------------------------------------------------
 // Edge filtering functions
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 // 4 pixels in, 2 pixels out
 static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
@@ -507,12 +524,16 @@ static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
   return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
   return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static WEBP_INLINE int needs_filter2(const uint8_t* p,
                                      int step, int t, int it) {
   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
@@ -523,10 +544,12 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
          VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
          VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
 //------------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
   int i;
   const int thresh2 = 2 * thresh + 1;
@@ -562,10 +585,12 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
     SimpleHFilter16(p, stride, thresh);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Complex In-loop filtering (Paragraph 15.3)
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static WEBP_INLINE void FilterLoop26(uint8_t* p,
                                      int hstride, int vstride, int size,
                                      int thresh, int ithresh, int hev_thresh) {
@@ -597,7 +622,9 @@ static WEBP_INLINE void FilterLoop24(uint8_t* p,
     p += vstride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
+#if !WEBP_NEON_OMIT_C_CODE
 // on macroblock edges
 static void VFilter16(uint8_t* p, int stride,
                       int thresh, int ithresh, int hev_thresh) {
@@ -618,7 +645,9 @@ static void VFilter16i(uint8_t* p, int stride,
     FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static void HFilter16i(uint8_t* p, int stride,
                        int thresh, int ithresh, int hev_thresh) {
   int k;
@@ -627,31 +656,40 @@ static void HFilter16i(uint8_t* p, int stride,
     FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
+#if !WEBP_NEON_OMIT_C_CODE
 // 8-pixels wide variant, for chroma filtering
 static void VFilter8(uint8_t* u, uint8_t* v, int stride,
                      int thresh, int ithresh, int hev_thresh) {
   FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
   FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static void HFilter8(uint8_t* u, uint8_t* v, int stride,
                      int thresh, int ithresh, int hev_thresh) {
   FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
   FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
                       int thresh, int ithresh, int hev_thresh) {
   FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
   FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
                       int thresh, int ithresh, int hev_thresh) {
   FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
   FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
 //------------------------------------------------------------------------------
 
@@ -709,37 +747,48 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
 
   VP8InitClipTables();
 
+#if !WEBP_NEON_OMIT_C_CODE
   VP8TransformWHT = TransformWHT;
   VP8Transform = TransformTwo;
-  VP8TransformUV = TransformUV;
   VP8TransformDC = TransformDC;
-  VP8TransformDCUV = TransformDCUV;
   VP8TransformAC3 = TransformAC3;
+#endif
+  VP8TransformUV = TransformUV;
+  VP8TransformDCUV = TransformDCUV;
 
+#if !WEBP_NEON_OMIT_C_CODE
   VP8VFilter16 = VFilter16;
+  VP8VFilter16i = VFilter16i;
   VP8HFilter16 = HFilter16;
   VP8VFilter8 = VFilter8;
-  VP8HFilter8 = HFilter8;
-  VP8VFilter16i = VFilter16i;
-  VP8HFilter16i = HFilter16i;
   VP8VFilter8i = VFilter8i;
-  VP8HFilter8i = HFilter8i;
   VP8SimpleVFilter16 = SimpleVFilter16;
   VP8SimpleHFilter16 = SimpleHFilter16;
   VP8SimpleVFilter16i = SimpleVFilter16i;
   VP8SimpleHFilter16i = SimpleHFilter16i;
+#endif
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+  VP8HFilter16i = HFilter16i;
+  VP8HFilter8 = HFilter8;
+  VP8HFilter8i = HFilter8i;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE
   VP8PredLuma4[0] = DC4;
   VP8PredLuma4[1] = TM4;
   VP8PredLuma4[2] = VE4;
-  VP8PredLuma4[3] = HE4;
   VP8PredLuma4[4] = RD4;
-  VP8PredLuma4[5] = VR4;
   VP8PredLuma4[6] = LD4;
+#endif
+
+  VP8PredLuma4[3] = HE4;
+  VP8PredLuma4[5] = VR4;
   VP8PredLuma4[7] = VL4;
   VP8PredLuma4[8] = HD4;
   VP8PredLuma4[9] = HU4;
 
+#if !WEBP_NEON_OMIT_C_CODE
   VP8PredLuma16[0] = DC16;
   VP8PredLuma16[1] = TM16;
   VP8PredLuma16[2] = VE16;
@@ -755,6 +804,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
   VP8PredChroma8[4] = DC8uvNoTop;
   VP8PredChroma8[5] = DC8uvNoLeft;
   VP8PredChroma8[6] = DC8uvNoTopLeft;
+#endif
 
   VP8DitherCombine8x8 = DitherCombine8x8;
 
@@ -770,11 +820,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
 #endif
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8DspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       VP8DspInitMIPS32();
@@ -791,5 +836,57 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8DspInitNEON();
+  }
+#endif
+
+  assert(VP8TransformWHT != NULL);
+  assert(VP8Transform != NULL);
+  assert(VP8TransformDC != NULL);
+  assert(VP8TransformAC3 != NULL);
+  assert(VP8TransformUV != NULL);
+  assert(VP8TransformDCUV != NULL);
+  assert(VP8VFilter16 != NULL);
+  assert(VP8HFilter16 != NULL);
+  assert(VP8VFilter8 != NULL);
+  assert(VP8HFilter8 != NULL);
+  assert(VP8VFilter16i != NULL);
+  assert(VP8HFilter16i != NULL);
+  assert(VP8VFilter8i != NULL);
+  assert(VP8HFilter8i != NULL);
+  assert(VP8SimpleVFilter16 != NULL);
+  assert(VP8SimpleHFilter16 != NULL);
+  assert(VP8SimpleVFilter16i != NULL);
+  assert(VP8SimpleHFilter16i != NULL);
+  assert(VP8PredLuma4[0] != NULL);
+  assert(VP8PredLuma4[1] != NULL);
+  assert(VP8PredLuma4[2] != NULL);
+  assert(VP8PredLuma4[3] != NULL);
+  assert(VP8PredLuma4[4] != NULL);
+  assert(VP8PredLuma4[5] != NULL);
+  assert(VP8PredLuma4[6] != NULL);
+  assert(VP8PredLuma4[7] != NULL);
+  assert(VP8PredLuma4[8] != NULL);
+  assert(VP8PredLuma4[9] != NULL);
+  assert(VP8PredLuma16[0] != NULL);
+  assert(VP8PredLuma16[1] != NULL);
+  assert(VP8PredLuma16[2] != NULL);
+  assert(VP8PredLuma16[3] != NULL);
+  assert(VP8PredLuma16[4] != NULL);
+  assert(VP8PredLuma16[5] != NULL);
+  assert(VP8PredLuma16[6] != NULL);
+  assert(VP8PredChroma8[0] != NULL);
+  assert(VP8PredChroma8[1] != NULL);
+  assert(VP8PredChroma8[2] != NULL);
+  assert(VP8PredChroma8[3] != NULL);
+  assert(VP8PredChroma8[4] != NULL);
+  assert(VP8PredChroma8[5] != NULL);
+  assert(VP8PredChroma8[6] != NULL);
+  assert(VP8DitherCombine8x8 != NULL);
+
   dec_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h
index 25efc18f..d2001121 100644
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -116,6 +116,22 @@ extern "C" {
 
 #endif  /* EMSCRIPTEN */
 
+#ifndef WEBP_DSP_OMIT_C_CODE
+#define WEBP_DSP_OMIT_C_CODE 1
+#endif
+
+#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
+#define WEBP_NEON_OMIT_C_CODE 1
+#else
+#define WEBP_NEON_OMIT_C_CODE 0
+#endif
+
+#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
+#define WEBP_NEON_WORK_AROUND_GCC 1
+#else
+#define WEBP_NEON_WORK_AROUND_GCC 0
+#endif
+
 // This macro prevents thread_sanitizer from reporting known concurrent writes.
 #define WEBP_TSAN_IGNORE_FUNCTION
 #if defined(__has_feature)
diff --git a/src/dsp/enc.c b/src/dsp/enc.c
index bf59d5b8..1c807f1d 100644
--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@@ -21,9 +21,11 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
   return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int clip_max(int v, int max) {
   return (v > max) ? max : v;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms:
@@ -56,6 +58,7 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
   histo->last_non_zero = last_non_zero;
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
                                int start_block, int end_block,
                                VP8Histogram* const histo) {
@@ -76,6 +79,7 @@ static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
   }
   VP8SetHistogramData(distribution, histo);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // run-time tables (~4k)
@@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
 
+#if !WEBP_NEON_OMIT_C_CODE
+
 #define STORE(x, y, v) \
   dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
 
@@ -176,6 +182,7 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
     out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
                           int16_t* out) {
@@ -183,6 +190,7 @@ static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
   VP8FTransform(src + 4, ref + 4, out + 16);
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void FTransformWHT_C(const int16_t* in, int16_t* out) {
   // input is 12b signed
   int32_t tmp[16];
@@ -212,6 +220,7 @@ static void FTransformWHT_C(const int16_t* in, int16_t* out) {
     out[12 + i] = b3 >> 1;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 #undef MUL
 #undef STORE
@@ -524,6 +533,7 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
 //------------------------------------------------------------------------------
 // Metric
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
                               int w, int h) {
   int count = 0;
@@ -551,6 +561,7 @@ static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
 static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
   return GetSSE(a, b, 4, 4);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
   int k, x, y;
@@ -572,6 +583,7 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
 // We try to match the spectral content (weighted) between source and
 // reconstructed samples.
 
+#if !WEBP_NEON_OMIT_C_CODE
 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
 // w[] contains a row-major 4 by 4 symmetric matrix.
@@ -627,6 +639,7 @@ static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
   }
   return D;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Quantization
@@ -663,6 +676,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
   return (last >= 0);
 }
 
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
                              const VP8Matrix* const mtx) {
   int nz;
@@ -670,6 +684,7 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
   nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
   return nz;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 
 //------------------------------------------------------------------------------
 // Block copy
@@ -735,23 +750,29 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
   InitTables();
 
   // default C implementations
-  VP8CollectHistogram = CollectHistogram_C;
+#if !WEBP_NEON_OMIT_C_CODE
   VP8ITransform = ITransform_C;
   VP8FTransform = FTransform_C;
-  VP8FTransform2 = FTransform2_C;
   VP8FTransformWHT = FTransformWHT_C;
+  VP8TDisto4x4 = Disto4x4_C;
+  VP8TDisto16x16 = Disto16x16_C;
+  VP8CollectHistogram = CollectHistogram_C;
+  VP8SSE16x16 = SSE16x16_C;
+  VP8SSE16x8 = SSE16x8_C;
+  VP8SSE8x8 = SSE8x8_C;
+  VP8SSE4x4 = SSE4x4_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+  VP8EncQuantizeBlock = QuantizeBlock_C;
+  VP8EncQuantize2Blocks = Quantize2Blocks_C;
+#endif
+
+  VP8FTransform2 = FTransform2_C;
   VP8EncPredLuma4 = Intra4Preds_C;
   VP8EncPredLuma16 = Intra16Preds_C;
   VP8EncPredChroma8 = IntraChromaPreds_C;
-  VP8SSE16x16 = SSE16x16_C;
-  VP8SSE8x8 = SSE8x8_C;
-  VP8SSE16x8 = SSE16x8_C;
-  VP8SSE4x4 = SSE4x4_C;
-  VP8TDisto4x4 = Disto4x4_C;
-  VP8TDisto16x16 = Disto16x16_C;
   VP8Mean16x4 = Mean16x4_C;
-  VP8EncQuantizeBlock = QuantizeBlock_C;
-  VP8EncQuantize2Blocks = Quantize2Blocks_C;
   VP8EncQuantizeBlockWHT = QuantizeBlock_C;
   VP8Copy4x4 = Copy4x4_C;
   VP8Copy16x8 = Copy16x8_C;
@@ -773,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
       VP8EncDspInitAVX2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8EncDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       VP8EncDspInitMIPS32();
@@ -794,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8EncDspInitNEON();
+  }
+#endif
+
+  assert(VP8ITransform != NULL);
+  assert(VP8FTransform != NULL);
+  assert(VP8FTransformWHT != NULL);
+  assert(VP8TDisto4x4 != NULL);
+  assert(VP8TDisto16x16 != NULL);
+  assert(VP8CollectHistogram != NULL);
+  assert(VP8SSE16x16 != NULL);
+  assert(VP8SSE16x8 != NULL);
+  assert(VP8SSE8x8 != NULL);
+  assert(VP8SSE4x4 != NULL);
+  assert(VP8EncQuantizeBlock != NULL);
+  assert(VP8EncQuantize2Blocks != NULL);
+  assert(VP8FTransform2 != NULL);
+  assert(VP8EncPredLuma4 != NULL);
+  assert(VP8EncPredLuma16 != NULL);
+  assert(VP8EncPredChroma8 != NULL);
+  assert(VP8Mean16x4 != NULL);
+  assert(VP8EncQuantizeBlockWHT != NULL);
+  assert(VP8Copy4x4 != NULL);
+  assert(VP8Copy16x8 != NULL);
+
   enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/src/dsp/filters.c b/src/dsp/filters.c
index 7759126f..ca5f877d 100644
--- a/src/dsp/filters.c
+++ b/src/dsp/filters.c
@@ -28,6 +28,7 @@
   assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
   (void)height;  // Silence unused warning.
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
                                       uint8_t* dst, int length, int inverse) {
   int i;
@@ -112,6 +113,7 @@ static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
     out += stride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 // Gradient filter.
@@ -121,6 +123,7 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
   return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
                                            int width, int height, int stride,
                                            int row, int num_rows,
@@ -160,11 +163,13 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
     out += stride;
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 #undef SANITY_CHECK
 
 //------------------------------------------------------------------------------
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void HorizontalFilter_C(const uint8_t* data, int width, int height,
                                int stride, uint8_t* filtered_data) {
   DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
@@ -180,7 +185,7 @@ static void GradientFilter_C(const uint8_t* data, int width, int height,
                              int stride, uint8_t* filtered_data) {
   DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
 }
-
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 //------------------------------------------------------------------------------
 
@@ -194,6 +199,7 @@ static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
   }
 }
 
+#if !WEBP_NEON_OMIT_C_CODE
 static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
                                uint8_t* out, int width) {
   if (prev == NULL) {
@@ -203,6 +209,7 @@ static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
     for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
                                uint8_t* out, int width) {
@@ -238,14 +245,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
   if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
 
   WebPUnfilters[WEBP_FILTER_NONE] = NULL;
+#if !WEBP_NEON_OMIT_C_CODE
   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
   WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
+#endif
   WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C;
 
   WebPFilters[WEBP_FILTER_NONE] = NULL;
+#if !WEBP_NEON_OMIT_C_CODE
   WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C;
   WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C;
   WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C;
+#endif
 
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
@@ -253,11 +264,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
       VP8FiltersInitSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8FiltersInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       VP8FiltersInitMIPSdspR2();
@@ -269,5 +275,20 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8FiltersInitNEON();
+  }
+#endif
+
+  assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
+  assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
+  assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
+  assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
+  assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
+  assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
+
   filters_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index 8b03b1bd..83f553d9 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -15,6 +15,7 @@
 
 #include "src/dsp/dsp.h"
 
+#include <assert.h>
 #include <math.h>
 #include <stdlib.h>
 #include "src/dec/vp8li_dec.h"
@@ -606,15 +607,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
 
+#if !WEBP_NEON_OMIT_C_CODE
   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
 
   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
 
-  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
+  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
+  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
+#endif
+
   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
-  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
 
   VP8LMapColor32b = MapARGB_C;
   VP8LMapColor8b = MapAlpha_C;
@@ -626,11 +630,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
       VP8LDspInitSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8LDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       VP8LDspInitMIPSdspR2();
@@ -642,6 +641,24 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8LDspInitNEON();
+  }
+#endif
+
+  assert(VP8LAddGreenToBlueAndRed != NULL);
+  assert(VP8LTransformColorInverse != NULL);
+  assert(VP8LConvertBGRAToRGBA != NULL);
+  assert(VP8LConvertBGRAToRGB != NULL);
+  assert(VP8LConvertBGRAToBGR != NULL);
+  assert(VP8LConvertBGRAToRGBA4444 != NULL);
+  assert(VP8LConvertBGRAToRGB565 != NULL);
+  assert(VP8LMapColor32b != NULL);
+  assert(VP8LMapColor8b != NULL);
+
   lossless_last_cpuinfo_used = VP8GetCPUInfo;
 }
 #undef COPY_PREDICTOR_ARRAY
diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c
index b8a6baba..92ca3c05 100644
--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@@ -15,6 +15,7 @@
 
 #include "src/dsp/dsp.h"
 
+#include <assert.h>
 #include <math.h>
 #include <stdlib.h>
 #include "src/dec/vp8li_dec.h"
@@ -870,9 +871,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
 
   VP8LDspInit();
 
+#if !WEBP_NEON_OMIT_C_CODE
   VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
 
   VP8LTransformColor = VP8LTransformColor_C;
+#endif
 
   VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
   VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
@@ -938,11 +941,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
 #endif
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8LEncDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       VP8LEncDspInitMIPS32();
@@ -959,6 +957,61 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8LEncDspInitNEON();
+  }
+#endif
+
+  assert(VP8LSubtractGreenFromBlueAndRed != NULL);
+  assert(VP8LTransformColor != NULL);
+  assert(VP8LCollectColorBlueTransforms != NULL);
+  assert(VP8LCollectColorRedTransforms != NULL);
+  assert(VP8LFastLog2Slow != NULL);
+  assert(VP8LFastSLog2Slow != NULL);
+  assert(VP8LExtraCost != NULL);
+  assert(VP8LExtraCostCombined != NULL);
+  assert(VP8LCombinedShannonEntropy != NULL);
+  assert(VP8LGetEntropyUnrefined != NULL);
+  assert(VP8LGetCombinedEntropyUnrefined != NULL);
+  assert(VP8LHistogramAdd != NULL);
+  assert(VP8LVectorMismatch != NULL);
+  assert(VP8LBundleColorMap != NULL);
+  assert(VP8LPredictorsSub[0] != NULL);
+  assert(VP8LPredictorsSub[1] != NULL);
+  assert(VP8LPredictorsSub[2] != NULL);
+  assert(VP8LPredictorsSub[3] != NULL);
+  assert(VP8LPredictorsSub[4] != NULL);
+  assert(VP8LPredictorsSub[5] != NULL);
+  assert(VP8LPredictorsSub[6] != NULL);
+  assert(VP8LPredictorsSub[7] != NULL);
+  assert(VP8LPredictorsSub[8] != NULL);
+  assert(VP8LPredictorsSub[9] != NULL);
+  assert(VP8LPredictorsSub[10] != NULL);
+  assert(VP8LPredictorsSub[11] != NULL);
+  assert(VP8LPredictorsSub[12] != NULL);
+  assert(VP8LPredictorsSub[13] != NULL);
+  assert(VP8LPredictorsSub[14] != NULL);
+  assert(VP8LPredictorsSub[15] != NULL);
+  assert(VP8LPredictorsSub_C[0] != NULL);
+  assert(VP8LPredictorsSub_C[1] != NULL);
+  assert(VP8LPredictorsSub_C[2] != NULL);
+  assert(VP8LPredictorsSub_C[3] != NULL);
+  assert(VP8LPredictorsSub_C[4] != NULL);
+  assert(VP8LPredictorsSub_C[5] != NULL);
+  assert(VP8LPredictorsSub_C[6] != NULL);
+  assert(VP8LPredictorsSub_C[7] != NULL);
+  assert(VP8LPredictorsSub_C[8] != NULL);
+  assert(VP8LPredictorsSub_C[9] != NULL);
+  assert(VP8LPredictorsSub_C[10] != NULL);
+  assert(VP8LPredictorsSub_C[11] != NULL);
+  assert(VP8LPredictorsSub_C[12] != NULL);
+  assert(VP8LPredictorsSub_C[13] != NULL);
+  assert(VP8LPredictorsSub_C[14] != NULL);
+  assert(VP8LPredictorsSub_C[15] != NULL);
+
   lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
 
diff --git a/src/dsp/rescaler.c b/src/dsp/rescaler.c
index 5d567339..dc61325b 100644
--- a/src/dsp/rescaler.c
+++ b/src/dsp/rescaler.c
@@ -210,10 +210,13 @@ static volatile VP8CPUInfo rescaler_last_cpuinfo_used =
 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
   if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return;
 
-  WebPRescalerImportRowExpand = WebPRescalerImportRowExpand_C;
-  WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C;
+#if !WEBP_NEON_OMIT_C_CODE
   WebPRescalerExportRowExpand = WebPRescalerExportRowExpand_C;
   WebPRescalerExportRowShrink = WebPRescalerExportRowShrink_C;
+#endif
+
+  WebPRescalerImportRowExpand = WebPRescalerImportRowExpand_C;
+  WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C;
 
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
@@ -221,11 +224,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
       WebPRescalerDspInitSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      WebPRescalerDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       WebPRescalerDspInitMIPS32();
@@ -242,5 +240,18 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPRescalerDspInitNEON();
+  }
+#endif
+
+  assert(WebPRescalerExportRowExpand != NULL);
+  assert(WebPRescalerExportRowShrink != NULL);
+  assert(WebPRescalerImportRowExpand != NULL);
+  assert(WebPRescalerImportRowShrink != NULL);
+
   rescaler_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c
index 07dc54a7..a6cfc7fe 100644
--- a/src/dsp/upsampling.c
+++ b/src/dsp/upsampling.c
@@ -93,6 +93,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
 }
 
 // All variants implemented.
+#if !WEBP_NEON_OMIT_C_CODE
 UPSAMPLE_FUNC(UpsampleRgbLinePair_C,  VP8YuvToRgb,  3)
 UPSAMPLE_FUNC(UpsampleBgrLinePair_C,  VP8YuvToBgr,  3)
 UPSAMPLE_FUNC(UpsampleRgbaLinePair_C, VP8YuvToRgba, 4)
@@ -100,6 +101,7 @@ UPSAMPLE_FUNC(UpsampleBgraLinePair_C, VP8YuvToBgra, 4)
 UPSAMPLE_FUNC(UpsampleArgbLinePair_C, VP8YuvToArgb, 4)
 UPSAMPLE_FUNC(UpsampleRgba4444LinePair_C, VP8YuvToRgba4444, 2)
 UPSAMPLE_FUNC(UpsampleRgb565LinePair_C,  VP8YuvToRgb565,  2)
+#endif
 
 #undef LOAD_UV
 #undef UPSAMPLE_FUNC
@@ -223,6 +225,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
   if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
 
 #ifdef FANCY_UPSAMPLING
+#if !WEBP_NEON_OMIT_C_CODE
   WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair_C;
   WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair_C;
   WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair_C;
@@ -234,6 +237,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
   WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair_C;
   WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair_C;
   WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair_C;
+#endif
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
@@ -242,11 +246,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
       WebPInitUpsamplersSSE2();
     }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      WebPInitUpsamplersNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       WebPInitUpsamplersMIPSdspR2();
@@ -258,6 +257,26 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
     }
 #endif
   }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPInitUpsamplersNEON();
+  }
+#endif
+
+  assert(WebPUpsamplers[MODE_RGB] != NULL);
+  assert(WebPUpsamplers[MODE_RGBA] != NULL);
+  assert(WebPUpsamplers[MODE_BGR] != NULL);
+  assert(WebPUpsamplers[MODE_BGRA] != NULL);
+  assert(WebPUpsamplers[MODE_ARGB] != NULL);
+  assert(WebPUpsamplers[MODE_RGBA_4444] != NULL);
+  assert(WebPUpsamplers[MODE_RGB_565] != NULL);
+  assert(WebPUpsamplers[MODE_rgbA] != NULL);
+  assert(WebPUpsamplers[MODE_bgrA] != NULL);
+  assert(WebPUpsamplers[MODE_Argb] != NULL);
+  assert(WebPUpsamplers[MODE_rgbA_4444] != NULL);
+
 #endif  // FANCY_UPSAMPLING
   upsampling_last_cpuinfo_used2 = VP8GetCPUInfo;
 }
diff --git a/src/dsp/yuv.c b/src/dsp/yuv.c
index 437744cc..bddf81fe 100644
--- a/src/dsp/yuv.c
+++ b/src/dsp/yuv.c
@@ -13,6 +13,7 @@
 
 #include "src/dsp/yuv.h"
 
+#include <assert.h>
 #include <stdlib.h>
 
 //-----------------------------------------------------------------------------
@@ -193,6 +194,7 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
 
 //-----------------------------------------------------------------------------
 
+#if !WEBP_NEON_OMIT_C_CODE
 #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
 static uint16_t clip_y(int v) {
   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
@@ -230,6 +232,7 @@ static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
     out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
   }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE
 
 #undef MAX_Y
 
@@ -270,9 +273,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
 
   WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
 
+#if !WEBP_NEON_OMIT_C_CODE
   WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
   WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
   WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
+#endif
 
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
@@ -281,13 +286,24 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
       WebPInitSharpYUVSSE2();
     }
 #endif  // WEBP_USE_SSE2
+  }
+
 #if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      WebPInitConvertARGBToYUVNEON();
-      WebPInitSharpYUVNEON();
-    }
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPInitConvertARGBToYUVNEON();
+    WebPInitSharpYUVNEON();
+  }
 #endif  // WEBP_USE_NEON
 
-  }
+  assert(WebPConvertARGBToY != NULL);
+  assert(WebPConvertARGBToUV != NULL);
+  assert(WebPConvertRGB24ToY != NULL);
+  assert(WebPConvertBGR24ToY != NULL);
+  assert(WebPConvertRGBA32ToUV != NULL);
+  assert(WebPSharpYUVUpdateY != NULL);
+  assert(WebPSharpYUVUpdateRGB != NULL);
+  assert(WebPSharpYUVFilterRow != NULL);
+
   rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo;
 }