From 64c844863ab0d13e8dd51a59001ab55fbe64065e Mon Sep 17 00:00:00 2001
From: Urvang Joshi <urvang@google.com>
Date: Mon, 13 May 2013 16:24:49 -0700
Subject: [PATCH] Further reduce memory to decode lossy+alpha images

Earlier such images were using roughly 9 * width * height bytes for
decoding. Now, they take 6 * width * height memory.

Change-Id: Ie4a681ca5074d96d64f30b2597fafdca648dd8f7
---
 src/dec/vp8l.c     | 322 ++++++++++++++++++++++++++-------------------
 src/dec/vp8li.h    |   3 +-
 src/dsp/lossless.c |  87 +++++++-----
 src/dsp/lossless.h |   7 +
 4 files changed, 253 insertions(+), 166 deletions(-)

diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index 1665fe17..19858822 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -625,10 +625,24 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
   }
 }
 
+// Special method for paletted alpha data.
+static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
+                                        const uint8_t* const rows) {
+  const int start_row = dec->last_row_;
+  const int end_row = start_row + num_rows;
+  const uint8_t* rows_in = rows;
+  uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
+  VP8LTransform* const transform = &dec->transforms_[0];
+  assert(dec->next_transform_ == 1);
+  assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
+  VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
+                                      rows_out);
+}
+
 // Processes (transforms, scales & color-converts) the rows decoded after the
 // last call.
 static void ProcessRows(VP8LDecoder* const dec, int row) {
-  const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_;
+  const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
   const int num_rows = row - dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
@@ -667,121 +681,135 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
   assert(dec->last_row_ <= dec->height_);
 }
 
-static int DecodeImageData(VP8LDecoder* const dec,
-                           uint32_t* const data, int width, int height,
-                           ProcessRowsFunc process_func) {
-  int ok = 1;
-  int col = 0, row = 0;
-  VP8LBitReader* const br = &dec->br_;
-  VP8LMetadata* const hdr = &dec->hdr_;
-  HTreeGroup* htree_group = hdr->htree_groups_;
-  uint32_t* src = data;
-  uint32_t* last_cached = data;
-  uint32_t* const src_end = data + width * height;
-  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
-  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
-  VP8LColorCache* const color_cache =
-      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
-  const int mask = hdr->huffman_mask_;
-
-  assert(htree_group != NULL);
-
-  while (!br->eos_ && src < src_end) {
-    int code;
-    // Only update when changing tile. Note we could use the following test:
-    //   if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
-    // but that's actually slower and requires storing the previous col/row
-    if ((col & mask) == 0) {
-      htree_group = GetHtreeGroupForPos(hdr, col, row);
-    }
-    VP8LFillBitWindow(br);
-    code = ReadSymbol(&htree_group->htrees_[GREEN], br);
-    if (code < NUM_LITERAL_CODES) {   // Literal.
-      int red, green, blue, alpha;
-      red = ReadSymbol(&htree_group->htrees_[RED], br);
-      green = code;
-      VP8LFillBitWindow(br);
-      blue = ReadSymbol(&htree_group->htrees_[BLUE], br);
-      alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);
-      *src = (alpha << 24) + (red << 16) + (green << 8) + blue;
- AdvanceByOne:
-      ++src;
-      ++col;
-      if (col >= width) {
-        col = 0;
-        ++row;
-        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
-          process_func(dec, row);
-        }
-        if (color_cache != NULL) {
-          while (last_cached < src) {
-            VP8LColorCacheInsert(color_cache, *last_cached++);
-          }
-        }
-      }
-    } else if (code < len_code_limit) {           // Backward reference
-      int dist_code, dist;
-      const int length_sym = code - NUM_LITERAL_CODES;
-      const int length = GetCopyLength(length_sym, br);
-      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
-      VP8LFillBitWindow(br);
-      dist_code = GetCopyDistance(dist_symbol, br);
-      dist = PlaneCodeToDistance(width, dist_code);
-      if (src - data < dist || src_end - src < length) {
-        ok = 0;
-        goto End;
-      }
-      {
-        int i;
-        for (i = 0; i < length; ++i) src[i] = src[i - dist];
-        src += length;
-      }
-      col += length;
-      while (col >= width) {
-        col -= width;
-        ++row;
-        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
-          process_func(dec, row);
-        }
-      }
-      if (src < src_end) {
-        htree_group = GetHtreeGroupForPos(hdr, col, row);
-        if (color_cache != NULL) {
-          while (last_cached < src) {
-            VP8LColorCacheInsert(color_cache, *last_cached++);
-          }
-        }
-      }
-    } else if (code < color_cache_limit) {    // Color cache.
-      const int key = code - len_code_limit;
-      assert(color_cache != NULL);
-      while (last_cached < src) {
-        VP8LColorCacheInsert(color_cache, *last_cached++);
-      }
-      *src = VP8LColorCacheLookup(color_cache, key);
-      goto AdvanceByOne;
-    } else {    // Not reached.
-      ok = 0;
-      goto End;
-    }
-    ok = !br->error_;
-    if (!ok) goto End;
-  }
-  // Process the remaining rows corresponding to last row-block.
-  if (process_func != NULL) process_func(dec, row);
-
- End:
-  if (br->error_ || !ok || (br->eos_ && src < src_end)) {
-    ok = 0;
-    dec->status_ = (!br->eos_) ?
-        VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;
-  } else if (src == src_end) {
-    dec->state_ = READ_DATA;
-  }
-
-  return ok;
+#define DECODE_DATA_FUNC(FUNC_NAME, TYPE, STORE_PIXEL)                         \
+static int FUNC_NAME(VP8LDecoder* const dec, TYPE* const data, int width,      \
+                     int height, ProcessRowsFunc process_func) {               \
+  int ok = 1;                                                                  \
+  int col = 0, row = 0;                                                        \
+  VP8LBitReader* const br = &dec->br_;                                         \
+  VP8LMetadata* const hdr = &dec->hdr_;                                        \
+  HTreeGroup* htree_group = hdr->htree_groups_;                                \
+  TYPE* src = data;                                                            \
+  TYPE* last_cached = data;                                                    \
+  TYPE* const src_end = data + width * height;                                 \
+  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;             \
+  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;       \
+  VP8LColorCache* const color_cache =                                          \
+      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;                \
+  const int mask = hdr->huffman_mask_;                                         \
+  assert(htree_group != NULL);                                                 \
+  while (!br->eos_ && src < src_end) {                                         \
+    int code;                                                                  \
+    /* Only update when changing tile. Note we could use this test:        */  \
+    /* if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed */  \
+    /* but that's actually slower and needs storing the previous col/row.  */  \
+    if ((col & mask) == 0) {                                                   \
+      htree_group = GetHtreeGroupForPos(hdr, col, row);                        \
+    }                                                                          \
+    VP8LFillBitWindow(br);                                                     \
+    code = ReadSymbol(&htree_group->htrees_[GREEN], br);                       \
+    if (code < NUM_LITERAL_CODES) {  /* Literal*/                              \
+      int red, green, blue, alpha;                                             \
+      red = ReadSymbol(&htree_group->htrees_[RED], br);                        \
+      green = code;                                                            \
+      VP8LFillBitWindow(br);                                                   \
+      blue = ReadSymbol(&htree_group->htrees_[BLUE], br);                      \
+      alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);                    \
+      *src = STORE_PIXEL(alpha, red, green, blue);                             \
+    AdvanceByOne:                                                              \
+      ++src;                                                                   \
+      ++col;                                                                   \
+      if (col >= width) {                                                      \
+        col = 0;                                                               \
+        ++row;                                                                 \
+        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {      \
+          process_func(dec, row);                                              \
+        }                                                                      \
+        if (color_cache != NULL) {                                             \
+          while (last_cached < src) {                                          \
+            VP8LColorCacheInsert(color_cache, *last_cached++);                 \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+    } else if (code < len_code_limit) {  /* Backward reference */              \
+      int dist_code, dist;                                                     \
+      const int length_sym = code - NUM_LITERAL_CODES;                         \
+      const int length = GetCopyLength(length_sym, br);                        \
+      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);     \
+      VP8LFillBitWindow(br);                                                   \
+      dist_code = GetCopyDistance(dist_symbol, br);                            \
+      dist = PlaneCodeToDistance(width, dist_code);                            \
+      if (src - data < dist || src_end - src < length) {                       \
+        ok = 0;                                                                \
+        goto End;                                                              \
+      }                                                                        \
+      {                                                                        \
+        int i;                                                                 \
+        for (i = 0; i < length; ++i) src[i] = src[i - dist];                   \
+        src += length;                                                         \
+      }                                                                        \
+      col += length;                                                           \
+      while (col >= width) {                                                   \
+        col -= width;                                                          \
+        ++row;                                                                 \
+        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {      \
+          process_func(dec, row);                                              \
+        }                                                                      \
+      }                                                                        \
+      if (src < src_end) {                                                     \
+        htree_group = GetHtreeGroupForPos(hdr, col, row);                      \
+        if (color_cache != NULL) {                                             \
+          while (last_cached < src) {                                          \
+            VP8LColorCacheInsert(color_cache, *last_cached++);                 \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+    } else if (code < color_cache_limit) {  /* Color cache */                  \
+      const int key = code - len_code_limit;                                   \
+      assert(color_cache != NULL);                                             \
+      while (last_cached < src) {                                              \
+        VP8LColorCacheInsert(color_cache, *last_cached++);                     \
+      }                                                                        \
+      *src = VP8LColorCacheLookup(color_cache, key);                           \
+      goto AdvanceByOne;                                                       \
+    } else {  /* Not reached */                                                \
+      ok = 0;                                                                  \
+      goto End;                                                                \
+    }                                                                          \
+    ok = !br->error_;                                                          \
+    if (!ok) goto End;                                                         \
+  }                                                                            \
+  /* Process the remaining rows corresponding to last row-block. */            \
+  if (process_func != NULL) process_func(dec, row);                            \
+End:                                                                           \
+  if (br->error_ || !ok || (br->eos_ && src < src_end)) {                      \
+    ok = 0;                                                                    \
+    dec->status_ =                                                             \
+        (!br->eos_) ? VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;       \
+  } else if (src == src_end) {                                                 \
+    dec->state_ = READ_DATA;                                                   \
+  }                                                                            \
+  return ok;                                                                   \
 }
 
+static WEBP_INLINE uint32_t GetARGBPixel(int alpha, int red, int green,
+                                         int blue) {
+  return (alpha << 24) | (red << 16) | (green << 8) | blue;
+}
+
+static WEBP_INLINE uint8_t GetAlphaPixel(int alpha, int red, int green,
+                                         int blue) {
+  (void)alpha;
+  (void)red;
+  (void)blue;
+  return green;  // Alpha value is stored in green channel.
+}
+
+DECODE_DATA_FUNC(DecodeImageData, uint32_t, GetARGBPixel)
+DECODE_DATA_FUNC(DecodeAlphaData, uint8_t, GetAlphaPixel)
+
+#undef DECODE_DATA_FUNC
+
 // -----------------------------------------------------------------------------
 // VP8LTransform
 
@@ -903,8 +931,8 @@ void VP8LClear(VP8LDecoder* const dec) {
   if (dec == NULL) return;
   ClearMetadata(&dec->hdr_);
 
-  free(dec->argb_);
-  dec->argb_ = NULL;
+  free(dec->pixels_);
+  dec->pixels_ = NULL;
   for (i = 0; i < dec->next_transform_; ++i) {
     ClearTransform(&dec->transforms_[i]);
   }
@@ -1028,35 +1056,38 @@ static int DecodeImageStream(int xsize, int ysize,
 }
 
 //------------------------------------------------------------------------------
-// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_
-
-static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
+// Allocate internal buffers dec->pixels_ and dec->argb_cache_.
+static int AllocateInternalBuffers(VP8LDecoder* const dec, int final_width,
+                                   size_t bytes_per_pixel) {
+  const int argb_cache_needed = (bytes_per_pixel == sizeof(uint32_t));
   const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
   // Scratch buffer corresponding to top-prediction row for transforming the
-  // first row in the row-blocks.
-  const uint64_t cache_top_pixels = final_width;
-  // Scratch buffer for temporary BGRA storage.
-  const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
+  // first row in the row-blocks. Not needed for paletted alpha.
+  const uint64_t cache_top_pixels = argb_cache_needed ? final_width : 0ULL;
+  // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
+  const uint64_t cache_pixels =
+      argb_cache_needed ? (uint64_t)final_width * NUM_ARGB_CACHE_ROWS : 0ULL;
   const uint64_t total_num_pixels =
       num_pixels + cache_top_pixels + cache_pixels;
 
   assert(dec->width_ <= final_width);
-  dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
-  if (dec->argb_ == NULL) {
+  dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, bytes_per_pixel);
+  if (dec->pixels_ == NULL) {
     dec->argb_cache_ = NULL;    // for sanity check
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     return 0;
   }
-  dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels;
+  dec->argb_cache_ =
+      argb_cache_needed ? dec->pixels_ + num_pixels + cache_top_pixels : NULL;
   return 1;
 }
 
 //------------------------------------------------------------------------------
-// Special row-processing that only stores the alpha data.
 
+// Special row-processing that only stores the alpha data.
 static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
   const int num_rows = row - dec->last_row_;
-  const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
+  const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
   ApplyInverseTransforms(dec, num_rows, in);
@@ -1070,7 +1101,17 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
     int i;
     for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
   }
+  dec->last_row_ = dec->last_out_row_ = row;
+}
 
+// Row-processing for the special case when alpha data contains only one
+// transform: color indexing.
+static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
+  const int num_rows = row - dec->last_row_;
+  const uint8_t* const in =
+      (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
+  if (num_rows <= 0) return;  // Nothing to be done.
+  ApplyInverseTransformsAlpha(dec, num_rows, in);
   dec->last_row_ = dec->last_out_row_ = row;
 }
 
@@ -1079,6 +1120,7 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
   VP8Io io;
   int ok = 0;
   VP8LDecoder* const dec = VP8LNew();
+  size_t bytes_per_pixel = sizeof(uint32_t);  // Default: BGRA mode.
   if (dec == NULL) return 0;
 
   dec->width_ = width;
@@ -1097,13 +1139,24 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
   dec->action_ = READ_HDR;
   if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
 
-  // Allocate output (note that dec->width_ may have changed here).
-  if (!AllocateARGBBuffers(dec, width)) goto Err;
+  // Special case: if alpha data contains only the color indexing transform
+  // (a frequent case), we will use DecodeAlphaData() method that only needs
+  // allocation of 1 byte per pixel (alpha channel).
+  if (dec->next_transform_ == 1 &&
+      dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM) {
+    bytes_per_pixel = sizeof(uint8_t);
+  }
+
+  // Allocate internal buffers (note that dec->width_ may have changed here).
+  if (!AllocateInternalBuffers(dec, width, bytes_per_pixel)) goto Err;
 
   // Decode (with special row processing).
   dec->action_ = READ_DATA;
-  ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
-                       ExtractAlphaRows);
+  ok = (bytes_per_pixel == sizeof(uint8_t)) ?
+      DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
+                      ExtractPalettedAlphaRows) :
+      DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
+                      ExtractAlphaRows);
 
  Err:
   VP8LDelete(dec);
@@ -1143,6 +1196,7 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
 }
 
 int VP8LDecodeImage(VP8LDecoder* const dec) {
+  const size_t bytes_per_pixel = sizeof(uint32_t);
   VP8Io* io = NULL;
   WebPDecParams* params = NULL;
 
@@ -1162,13 +1216,13 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
     goto Err;
   }
 
-  if (!AllocateARGBBuffers(dec, io->width)) goto Err;
+  if (!AllocateInternalBuffers(dec, io->width, bytes_per_pixel)) goto Err;
 
   if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
 
   // Decode.
   dec->action_ = READ_DATA;
-  if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
+  if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
                        ProcessRows)) {
     goto Err;
   }
diff --git a/src/dec/vp8li.h b/src/dec/vp8li.h
index ee29eb5f..8b240585 100644
--- a/src/dec/vp8li.h
+++ b/src/dec/vp8li.h
@@ -63,7 +63,8 @@ typedef struct {
 
   const WebPDecBuffer *output_;    // shortcut to io->opaque->output
 
-  uint32_t        *argb_;          // Internal data: always in BGRA color mode.
+  uint32_t        *pixels_;        // Internal data: either uint8_t* for alpha
+                                   // or uint32_t* for BGRA.
   uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.
 
   VP8LBitReader    br_;
diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c
index c015b7ad..1060cbd3 100644
--- a/src/dsp/lossless.c
+++ b/src/dsp/lossless.c
@@ -1093,39 +1093,64 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
 }
 
 // Separate out pixels packed together using pixel-bundling.
-static void ColorIndexInverseTransform(
-    const VP8LTransform* const transform,
-    int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
-  int y;
-  const int bits_per_pixel = 8 >> transform->bits_;
-  const int width = transform->xsize_;
-  const uint32_t* const color_map = transform->data_;
-  if (bits_per_pixel < 8) {
-    const int pixels_per_byte = 1 << transform->bits_;
-    const int count_mask = pixels_per_byte - 1;
-    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
-    for (y = y_start; y < y_end; ++y) {
-      uint32_t packed_pixels = 0;
-      int x;
-      for (x = 0; x < width; ++x) {
-        // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
-        // increments of x. Fortunately, pixels_per_byte is a power of 2, so
-        // can just use a mask for that, instead of decrementing a counter.
-        if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
-        *dst++ = color_map[packed_pixels & bit_mask];
-        packed_pixels >>= bits_per_pixel;
-      }
-    }
-  } else {
-    for (y = y_start; y < y_end; ++y) {
-      int x;
-      for (x = 0; x < width; ++x) {
-        *dst++ = color_map[((*src++) >> 8) & 0xff];
-      }
-    }
-  }
+// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
+#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)             \
+void FUNC_NAME(const VP8LTransform* const transform,                           \
+               int y_start, int y_end, const TYPE* src, TYPE* dst) {           \
+  int y;                                                                       \
+  const int bits_per_pixel = 8 >> transform->bits_;                            \
+  const int width = transform->xsize_;                                         \
+  const uint32_t* const color_map = transform->data_;                          \
+  if (bits_per_pixel < 8) {                                                    \
+    const int pixels_per_byte = 1 << transform->bits_;                         \
+    const int count_mask = pixels_per_byte - 1;                                \
+    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      uint32_t packed_pixels = 0;                                              \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        /* We need to load fresh 'packed_pixels' once every                */  \
+        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
+        /* is a power of 2, so can just use a mask for that, instead of    */  \
+        /* decrementing a counter.                                         */  \
+        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
+        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
+        packed_pixels >>= bits_per_pixel;                                      \
+      }                                                                        \
+    }                                                                          \
+  } else {                                                                     \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                      \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
 }
 
+static WEBP_INLINE uint32_t GetARGBIndex(uint32_t index) {
+  return (index >> 8) & 0xff;
+}
+
+static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t index) {
+  return index;
+}
+
+static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
+  return val;
+}
+
+static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
+  return (val >> 8) & 0xff;
+}
+
+static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
+                           GetARGBValue)
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
+                    GetAlphaValue)
+
+#undef COLOR_INDEX_INVERSE
+
 void VP8LInverseTransform(const VP8LTransform* const transform,
                           int row_start, int row_end,
                           const uint32_t* const in, uint32_t* const out) {
diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h
index 6742bcc8..3fd224e6 100644
--- a/src/dsp/lossless.h
+++ b/src/dsp/lossless.h
@@ -33,6 +33,13 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
                           int row_start, int row_end,
                           const uint32_t* const in, uint32_t* const out);
 
+// Similar to the static method ColorIndexInverseTransform() that is part of
+// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
+// uint32_t) arguments for 'src' and 'dst'.
+void VP8LColorIndexInverseTransformAlpha(
+    const struct VP8LTransform* const transform, int y_start, int y_end,
+    const uint8_t* src, uint8_t* dst);
+
 // Subtracts green from blue and red channels.
 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);