unify the ALTERNATE_CODE flag usage

Pattern is now: #if !defined(FLAG) #define FLAG 0 // ALTERNATE_CODE #endif ... #if (FLAG == 1) ... #else ... #endif // FLAG ... Removed some unused code / flags: WEBP_YUV_USE_TABLE, WEBP_REFERENCE_IMPLEMENTATION, experimental code, VP8YUVInit(), ... BUG=webp:355 Change-Id: I98deb9189446a4cfd665c13ea8aa1ce6a308c63f
2017-07-31 18:53:29 -07:00
parent c4568b47fd
commit 663a6d9d2e
19 changed files with 70 additions and 163 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,7 +35,7 @@ string(REGEX MATCH "[0-9.]+" WEBP_VERSION ${SOURCE_FILE})
 ################################################################################
 # Options.
 if(WEBP_ENABLE_SWAP_16BIT_CSP)
-  add_definitions(-DWEBP_SWAP_16BIT_CSP)
+  add_definitions(-DWEBP_SWAP_16BIT_CSP=1)
 endif()

 ################################################################################
--- a/configure.ac
+++ b/configure.ac
@@ -659,7 +659,7 @@ if test "$enable_wic" = "yes"; then
 fi
 esac

-dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP
+dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP=1

 USE_SWAP_16BIT_CSP=""
 AC_MSG_CHECKING(if --enable-swap-16bit-csp option is specified)
@@ -667,7 +667,7 @@ AC_ARG_ENABLE([swap-16bit-csp],
              AS_HELP_STRING([--enable-swap-16bit-csp],
                             [Enable byte swap for 16 bit colorspaces]))
 if test "$enable_swap_16bit_csp" = "yes"; then
-  USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP"
+  USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP=1"
 fi
 AC_MSG_RESULT(${enable_swap_16bit_csp-no})
 AC_SUBST(USE_SWAP_16BIT_CSP)
--- a/makefile.unix
+++ b/makefile.unix
@@ -57,7 +57,7 @@ endif
 # EXTRA_FLAGS += -DWEBP_EXPERIMENTAL_FEATURES

 # Extra flags to enable byte swap for 16 bit colorspaces.
-# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP
+# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP=1

 # Extra flags to enable multi-threading
 EXTRA_FLAGS += -DWEBP_USE_THREAD
--- a/src/dec/io_dec.c
+++ b/src/dec/io_dec.c
@@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
    int num_rows;
    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    uint8_t* alpha_dst = base_rgba;
 #else
    uint8_t* alpha_dst = base_rgba + 1;
@@ -431,7 +431,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
                               int max_lines_out) {
  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
  uint8_t* alpha_dst = base_rgba;
 #else
  uint8_t* alpha_dst = base_rgba + 1;
@@ -598,9 +598,6 @@ static int CustomSetup(VP8Io* io) {
    }
  }

-  if (is_rgb) {
-    VP8YUVInit();
-  }
  return 1;
 }

--- a/src/dec/tree_dec.c
+++ b/src/dec/tree_dec.c
@@ -14,12 +14,16 @@
 #include "./vp8i_dec.h"
 #include "../utils/bit_reader_inl_utils.h"

+#if !defined(USE_GENERIC_TREE)
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
-#define USE_GENERIC_TREE
+#define USE_GENERIC_TREE 1   // ALTERNATE_CODE
+#else
+#define USE_GENERIC_TREE 0
 #endif
+#endif  // USE_GENERIC_TREE

-#ifdef USE_GENERIC_TREE
+#if (USE_GENERIC_TREE == 1)
 static const int8_t kYModesIntra4[18] = {
  -B_DC_PRED, 1,
    -B_TM_PRED, 2,
@@ -317,7 +321,7 @@ static void ParseIntraMode(VP8BitReader* const br,
      int x;
      for (x = 0; x < 4; ++x) {
        const uint8_t* const prob = kBModesProba[top[x]][ymode];
-#ifdef USE_GENERIC_TREE
+#if (USE_GENERIC_TREE == 1)
        // Generic tree-parsing
        int i = kYModesIntra4[VP8GetBit(br, prob[0])];
        while (i > 0) {
--- a/src/dsp/alpha_processing.c
+++ b/src/dsp/alpha_processing.c
@@ -15,7 +15,10 @@
 #include "./dsp.h"

 // Tables can be faster on some platform but incur some extra binary size (~2k).
-// #define USE_TABLES_FOR_ALPHA_MULT
+#if !defined(USE_TABLES_FOR_ALPHA_MULT)
+#define USE_TABLES_FOR_ALPHA_MULT 0   // ALTERNATE_CODE
+#endif
+

 // -----------------------------------------------------------------------------

@@ -29,7 +32,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
  return v;
 }

-#ifdef USE_TABLES_FOR_ALPHA_MULT
+#if (USE_TABLES_FOR_ALPHA_MULT == 1)

 static const uint32_t kMultTables[2][256] = {
  {    // (255u << MFIX) / alpha
@@ -277,7 +280,7 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,

 static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
                                     int w, int h, int stride) {
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
 #else
  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
--- a/src/dsp/dec_clip_tables.c
+++ b/src/dsp/dec_clip_tables.c
@@ -13,9 +13,12 @@

 #include "./dsp.h"

-#define USE_STATIC_TABLES     // undefine to have run-time table initialization
+// define to 0 to have run-time table initialization
+#if !defined(USE_STATIC_TABLES)
+#define USE_STATIC_TABLES 1   // ALTERNATE_CODE
+#endif

-#ifdef USE_STATIC_TABLES
+#if (USE_STATIC_TABLES == 1)

 static const uint8_t abs0[255 + 255 + 1] = {
  0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
@@ -337,7 +340,7 @@ static uint8_t clip1[255 + 511 + 1];
 // and make sure it's set to true _last_ (so as to be thread-safe)
 static volatile int tables_ok = 0;

-#endif
+#endif    // USE_STATIC_TABLES

 const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
 const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
@@ -345,7 +348,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
 const uint8_t* const VP8kabs0 = &abs0[255];

 WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
-#if !defined(USE_STATIC_TABLES)
+#if (USE_STATIC_TABLES == 0)
  int i;
  if (!tables_ok) {
    for (i = -255; i <= 255; ++i) {
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
@@ -18,7 +18,9 @@

 // The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
 // one it seems => disable it by default. Uncomment the following to enable:
-// #define USE_TRANSFORM_AC3
+#if !defined(USE_TRANSFORM_AC3)
+#define USE_TRANSFORM_AC3 0   // ALTERNATE_CODE
+#endif

 #include <emmintrin.h>
 #include "./common_sse2.h"
@@ -193,7 +195,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
  }
 }

-#if defined(USE_TRANSFORM_AC3)
+#if (USE_TRANSFORM_AC3 == 1)
 #define MUL(a, b) (((a) * (b)) >> 16)
 static void TransformAC3(const int16_t* in, uint8_t* dst) {
  static const int kC1 = 20091 + (1 << 16);
@@ -1182,8 +1184,8 @@ extern void VP8DspInitSSE2(void);

 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
  VP8Transform = Transform;
-#if defined(USE_TRANSFORM_AC3)
-  VP8TransformAC3 = TransformAC3;
+#if (USE_TRANSFORM_AC3 == 1)
+  VP8TransformAC3 = TransformAC3_SSE2;
 #endif

  VP8VFilter16 = VFilter16;
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -134,6 +134,11 @@ extern "C" {
 #endif
 #endif

+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
 typedef enum {
  kSSE2,
  kSSE3,
--- a/src/dsp/filters_neon.c
+++ b/src/dsp/filters_neon.c
@@ -251,9 +251,11 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
 // GradientUnfilter_NEON is correct but slower than the C-version,
 // at least on ARM64. For armv7, it's a wash.
 // So best is to disable it for now, but keep the idea around...
-// #define USE_GRADIENT_UNFILTER
+#if !defined(USE_GRADIENT_UNFILTER)
+#define USE_GRADIENT_UNFILTER 0   // ALTERNATE_CODE
+#endif

-#if defined(USE_GRADIENT_UNFILTER)
+#if (USE_GRADIENT_UNFILTER == 1)
 #define GRAD_PROCESS_LANE(L)  do {                                             \
  const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1);  /* rotate predictor in */   \
  const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1));                       \
@@ -311,7 +313,7 @@ extern void VP8FiltersInitNEON(void);
 WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
-#if defined(USE_GRADIENT_UNFILTER)
+#if (USE_GRADIENT_UNFILTER == 1)
  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
 #endif

--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -452,7 +452,7 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
    const uint32_t argb = *src++;
    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    *dst++ = ba;
    *dst++ = rg;
 #else
@@ -469,7 +469,7 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
    const uint32_t argb = *src++;
    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    *dst++ = gb;
    *dst++ = rg;
 #else
@@ -496,22 +496,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
    const uint32_t* const src_end = src + num_pixels;
    while (src < src_end) {
      const uint32_t argb = *src++;
-
-#if !defined(WORDS_BIGENDIAN)
-#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
      WebPUint32ToMem(dst, BSwap32(argb));
-#else  // WEBP_REFERENCE_IMPLEMENTATION
-      dst[0] = (argb >> 24) & 0xff;
-      dst[1] = (argb >> 16) & 0xff;
-      dst[2] = (argb >>  8) & 0xff;
-      dst[3] = (argb >>  0) & 0xff;
-#endif
-#else  // WORDS_BIGENDIAN
-      dst[0] = (argb >>  0) & 0xff;
-      dst[1] = (argb >>  8) & 0xff;
-      dst[2] = (argb >> 16) & 0xff;
-      dst[3] = (argb >> 24) & 0xff;
-#endif
      dst += sizeof(argb);
    }
  } else {
--- a/src/dsp/lossless_mips_dsp_r2.c
+++ b/src/dsp/lossless_mips_dsp_r2.c
@@ -492,7 +492,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
    "ins            %[temp3],    %[temp5],          16,   4    \n\t"
    "addiu          %[src],      %[src],            16         \n\t"
    "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    "usw            %[temp1],    0(%[dst])                     \n\t"
    "usw            %[temp3],    4(%[dst])                     \n\t"
 #else
@@ -514,7 +514,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
    "addiu          %[src],      %[src],            4          \n\t"
    "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    "ush            %[temp0],    0(%[dst])                     \n\t"
 #else
    "wsbh           %[temp0],    %[temp0]                      \n\t"
@@ -570,7 +570,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
    "ins            %[temp2],    %[temp3],          0,    5    \n\t"
    "addiu          %[src],      %[src],            16         \n\t"
    "append         %[temp2],    %[temp1],          16         \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    "usw            %[temp0],    0(%[dst])                     \n\t"
    "usw            %[temp2],    4(%[dst])                     \n\t"
 #else
@@ -592,7 +592,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
    "addiu          %[src],      %[src],            4          \n\t"
    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    "ush            %[temp4],    0(%[dst])                     \n\t"
 #else
    "wsbh           %[temp4],    %[temp4]                      \n\t"
--- a/src/dsp/lossless_sse2.c
+++ b/src/dsp/lossless_sse2.c
@@ -570,7 +570,7 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
    const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f);  // g0-|g1-|...|a6-|a7-
    const __m128i rgba0 = _mm_or_si128(ga2, rb1);       // rg0..rg7 | ba0..ba7
    const __m128i rgba1 = _mm_srli_si128(rgba0, 8);     // ba0..ba7 | 0
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0);  // barg0...barg7
 #else
    const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1);  // rgba0...rgba7
@@ -611,7 +611,7 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
    const __m128i rg1 = _mm_or_si128(rb1, g_lo2);           // gr0...gr7|xx
    const __m128i b1 = _mm_srli_epi16(b0, 3);
    const __m128i gb1 = _mm_or_si128(b1, g_hi2);            // bg0...bg7|xx
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1);     // rggb0...rggb7
 #else
    const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1);     // bgrb0...bgrb7
--- a/src/dsp/upsampling.c
+++ b/src/dsp/upsampling.c
@@ -141,7 +141,6 @@ DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)

 WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
  WebPInitUpsamplers();
-  VP8YUVInit();
 #ifdef FANCY_UPSAMPLING
  return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
 #else
--- a/src/dsp/upsampling_mips_dsp_r2.c
+++ b/src/dsp/upsampling_mips_dsp_r2.c
@@ -19,8 +19,6 @@
 #include <assert.h>
 #include "./yuv.h"

-#if !defined(WEBP_YUV_USE_TABLE)
-
 #define YUV_TO_RGB(Y, U, V, R, G, B) do {                                      \
    const int t1 = MultHi(Y, 19077);                                           \
    const int t2 = MultHi(V, 13320);                                           \
@@ -68,7 +66,7 @@ static WEBP_INLINE void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
  {
    const int rg = (r & 0xf8) | (g >> 5);
    const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    rgb[0] = gb;
    rgb[1] = rg;
 #else
@@ -84,7 +82,7 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
  {
    const int rg = (r & 0xf0) | (g >> 4);
    const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    argb[0] = ba;
    argb[1] = rg;
 #else
@@ -93,7 +91,6 @@ static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
 #endif
   }
 }
-#endif  // WEBP_YUV_USE_TABLE

 //-----------------------------------------------------------------------------
 // Alpha handling variants
--- a/src/dsp/upsampling_msa.c
+++ b/src/dsp/upsampling_msa.c
@@ -274,7 +274,7 @@ static void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
  const int b = Clip8(b1 >> 6);
  const int rg = (r & 0xf8) | (g >> 5);
  const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
  rgb[0] = gb;
  rgb[1] = rg;
 #else
@@ -293,7 +293,7 @@ static void YuvToRgba4444(int y, int u, int v, uint8_t* const argb) {
  const int b = Clip8(b1 >> 6);
  const int rg = (r & 0xf0) | (g >> 4);
  const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
  argb[0] = ba;
  argb[1] = rg;
 #else
@@ -459,7 +459,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
                              const uint8_t* v, uint8_t* dst, int length) {
  v16u8 R, G, B, RG, BA, tmp0, tmp1;
  while (length >= 16) {
-  #ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    CALC_RGBA4444(y, u, v, BA, RG, 16, dst);
 #else
    CALC_RGBA4444(y, u, v, RG, BA, 16, dst);
@@ -473,7 +473,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
  if (length > 8) {
    uint8_t temp[2 * 16] = { 0 };
    memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    CALC_RGBA4444(temp, u, v, BA, RG, 16, temp);
 #else
    CALC_RGBA4444(temp, u, v, RG, BA, 16, temp);
@@ -482,7 +482,7 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
  } else if (length > 0) {
    uint8_t temp[2 * 8] = { 0 };
    memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    CALC_RGBA4444(temp, u, v, BA, RG, 8, temp);
 #else
    CALC_RGBA4444(temp, u, v, RG, BA, 8, temp);
@@ -495,7 +495,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
                            const uint8_t* v, uint8_t* dst, int length) {
  v16u8 R, G, B, RG, GB, tmp0, tmp1;
  while (length >= 16) {
-  #ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    CALC_RGB565(y, u, v, GB, RG, 16, dst);
 #else
    CALC_RGB565(y, u, v, RG, GB, 16, dst);
@@ -509,7 +509,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
  if (length > 8) {
    uint8_t temp[2 * 16] = { 0 };
    memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    CALC_RGB565(temp, u, v, GB, RG, 16, temp);
 #else
    CALC_RGB565(temp, u, v, RG, GB, 16, temp);
@@ -518,7 +518,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
  } else if (length > 0) {
    uint8_t temp[2 * 8] = { 0 };
    memcpy(temp, y, length * sizeof(*temp));
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
    CALC_RGB565(temp, u, v, GB, RG, 8, temp);
 #else
    CALC_RGB565(temp, u, v, RG, GB, 8, temp);
--- a/src/dsp/yuv.c
+++ b/src/dsp/yuv.c
@@ -15,59 +15,6 @@

 #include <stdlib.h>

-#if defined(WEBP_YUV_USE_TABLE)
-
-static int done = 0;
-
-static WEBP_INLINE uint8_t clip(int v, int max_value) {
-  return v < 0 ? 0 : v > max_value ? max_value : v;
-}
-
-int16_t VP8kVToR[256], VP8kUToB[256];
-int32_t VP8kVToG[256], VP8kUToG[256];
-uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
-uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {
-  int i;
-  if (done) {
-    return;
-  }
-#ifndef USE_YUVj
-  for (i = 0; i < 256; ++i) {
-    VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
-    VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
-    VP8kVToG[i] = -45773 * (i - 128);
-    VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
-  }
-  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
-    const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
-    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
-    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
-  }
-#else
-  for (i = 0; i < 256; ++i) {
-    VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
-    VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
-    VP8kVToG[i] = -46802 * (i - 128);
-    VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
-  }
-  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
-    const int k = i;
-    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
-    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
-  }
-#endif
-
-  done = 1;
-}
-
-#else
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
-
-#endif  // WEBP_YUV_USE_TABLE
-
 //-----------------------------------------------------------------------------
 // Plain-C version

--- a/src/dsp/yuv.h
+++ b/src/dsp/yuv.h
@@ -38,16 +38,6 @@
 #include "./dsp.h"
 #include "../dec/vp8_dec.h"

-#if defined(WEBP_EXPERIMENTAL_FEATURES)
-// Do NOT activate this feature for real compression. This is only experimental!
-// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
-// This colorspace is close to Rec.601's Y'CbCr model with the notable
-// difference of allowing larger range for luma/chroma.
-// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
-// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
-// #define USE_YUVj
-#endif
-
 //------------------------------------------------------------------------------
 // YUV -> RGB conversion

@@ -111,7 +101,7 @@ static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
  const int b = VP8YUVToB(y, u);      // 5 usable bits
  const int rg = (r & 0xf8) | (g >> 5);
  const int gb = ((g << 3) & 0xe0) | (b >> 3);
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
  rgb[0] = gb;
  rgb[1] = rg;
 #else
@@ -127,7 +117,7 @@ static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
  const int b = VP8YUVToB(y, u);        // 4 usable bits
  const int rg = (r & 0xf0) | (g >> 4);
  const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
-#ifdef WEBP_SWAP_16BIT_CSP
+#if (WEBP_SWAP_16BIT_CSP == 1)
  argb[0] = ba;
  argb[1] = rg;
 #else
@@ -157,9 +147,6 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
  rgba[3] = 0xff;
 }

-// Must be called before everything, to initialize the tables.
-void VP8YUVInit(void);
-
 //-----------------------------------------------------------------------------
 // SSE2 extra functions (mostly for upsampling_sse2.c)

@@ -192,8 +179,6 @@ static WEBP_INLINE int VP8ClipUV(int uv, int rounding) {
  return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255;
 }

-#ifndef USE_YUVj
-
 static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
  const int luma = 16839 * r + 33059 * g + 6420 * b;
  return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX;  // no need to clip
@@ -209,28 +194,6 @@ static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
  return VP8ClipUV(v, rounding);
 }

-#else
-
-// This JPEG-YUV colorspace, only for comparison!
-// These are also 16bit precision coefficients from Rec.601, but with full
-// [0..255] output range.
-static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
-  const int luma = 19595 * r + 38470 * g + 7471 * b;
-  return (luma + rounding) >> YUV_FIX;  // no need to clip
-}
-
-static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
-  const int u = -11058 * r - 21710 * g + 32768 * b;
-  return VP8ClipUV(u, rounding);
-}
-
-static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
-  const int v = 32768 * r - 27439 * g - 5329 * b;
-  return VP8ClipUV(v, rounding);
-}
-
-#endif    // USE_YUVj
-
 #ifdef __cplusplus
 }    // extern "C"
 #endif
--- a/src/dsp/yuv_sse2.c
+++ b/src/dsp/yuv_sse2.c
@@ -119,7 +119,7 @@ static WEBP_INLINE void PackAndStore4444(const __m128i* const R,
                                         const __m128i* const B,
                                         const __m128i* const A,
                                         uint8_t* const dst) {
-#if !defined(WEBP_SWAP_16BIT_CSP)
+#if (WEBP_SWAP_16BIT_CSP == 0)
  const __m128i rg0 = _mm_packus_epi16(*R, *G);
  const __m128i ba0 = _mm_packus_epi16(*B, *A);
 #else
@@ -149,7 +149,7 @@ static WEBP_INLINE void PackAndStore565(const __m128i* const R,
  const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
  const __m128i rg = _mm_or_si128(r1, g1);
  const __m128i gb = _mm_or_si128(g2, b1);
-#if !defined(WEBP_SWAP_16BIT_CSP)
+#if (WEBP_SWAP_16BIT_CSP == 0)
  const __m128i rgb565 = _mm_unpacklo_epi8(rg, gb);
 #else
  const __m128i rgb565 = _mm_unpacklo_epi8(gb, rg);