Fix 12 TAP convolution bug

Priviously, we do 12-tap interpolation even there is no sub pixel, This could cause a bug becuase decoder doesn't extend border when there is no sub pixel. In this situation, if we still do interpolation, we will access the border extension which doesn't exist and cause a memory error Change-Id: I55b879722f0a10c5d13261bd9617a75c826a2418
2016-02-19 19:31:38 -08:00
parent 961668c91c
commit 1e403064b9
2 changed files with 204 additions and 112 deletions
--- a/test/vp10_convolve_test.cc
+++ b/test/vp10_convolve_test.cc
@@ -60,38 +60,42 @@ TEST(VP10ConvolveTest, vp10_convolve) {
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
-  int subpel_x_q4 = 3;
-  int subpel_y_q4 = 2;
  int avg = 0;
-
  int w = 1;
  int h = 1;

+  int subpel_x_q4;
+  int subpel_y_q4;
+
  for (int i = 0; i < filter_size * filter_size; i++) {
    src[i] = rnd.Rand16() % (1 << 8);
  }

-  vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
-                dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
-                subpel_y_q4, y_step_q4, avg);
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      vp10_convolve(src + src_stride * filter_center + filter_center,
+                    src_stride, dst, dst_stride, w, h, filter_params,
+                    subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);

-  const int16_t* x_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
-  const int16_t* y_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+      const int16_t* x_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+      const int16_t* y_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);

-  int temp[12];
-  int dst_ref = 0;
-  for (int r = 0; r < filter_size; r++) {
-    temp[r] = 0;
-    for (int c = 0; c < filter_size; c++) {
-      temp[r] += x_filter[c] * src[r * filter_size + c];
+      int temp[12];
+      int dst_ref = 0;
+      for (int r = 0; r < filter_size; r++) {
+        temp[r] = 0;
+        for (int c = 0; c < filter_size; c++) {
+          temp[r] += x_filter[c] * src[r * filter_size + c];
+        }
+        temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
+        dst_ref += temp[r] * y_filter[r];
+      }
+      dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
+      EXPECT_EQ(dst[0], dst_ref);
    }
-    temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
-    dst_ref += temp[r] * y_filter[r];
  }
-  dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
-  EXPECT_EQ(dst[0], dst_ref);
 }

 TEST(VP10ConvolveTest, vp10_convolve_avg) {
@@ -110,13 +114,14 @@ TEST(VP10ConvolveTest, vp10_convolve_avg) {
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
-  int subpel_x_q4 = 3;
-  int subpel_y_q4 = 2;
  int avg = 0;

  int w = 1;
  int h = 1;

+  int subpel_x_q4;
+  int subpel_y_q4;
+
  for (int i = 0; i < filter_size * filter_size; i++) {
    src0[i] = rnd.Rand16() % (1 << 8);
    src1[i] = rnd.Rand16() % (1 << 8);
@@ -124,23 +129,29 @@ TEST(VP10ConvolveTest, vp10_convolve_avg) {

  int offset = filter_size * filter_center + filter_center;

-  avg = 0;
-  vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
-                filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
-                avg);
-  avg = 0;
-  vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
-                filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
-                avg);
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      avg = 0;
+      vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);
+      avg = 0;
+      vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);

-  avg = 0;
-  vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h, filter_params,
-                subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
-  avg = 1;
-  vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h, filter_params,
-                subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
+      avg = 0;
+      vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);
+      avg = 1;
+      vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);

-  EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+    }
+  }
 }

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -157,40 +168,45 @@ TEST(VP10ConvolveTest, vp10_highbd_convolve) {
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
-  int subpel_x_q4 = 8;
-  int subpel_y_q4 = 6;
  int avg = 0;
  int bd = 10;
-
  int w = 1;
  int h = 1;

+  int subpel_x_q4;
+  int subpel_y_q4;
+
  for (int i = 0; i < filter_size * filter_size; i++) {
    src[i] = rnd.Rand16() % (1 << bd);
  }

-  vp10_highbd_convolve(
-      CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
-      src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
-      subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      vp10_highbd_convolve(
+          CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
+          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+          subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);

-  const int16_t* x_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
-  const int16_t* y_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+      const int16_t* x_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+      const int16_t* y_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);

-  int temp[12];
-  int dst_ref = 0;
-  for (int r = 0; r < filter_size; r++) {
-    temp[r] = 0;
-    for (int c = 0; c < filter_size; c++) {
-      temp[r] += x_filter[c] * src[r * filter_size + c];
+      int temp[12];
+      int dst_ref = 0;
+      for (int r = 0; r < filter_size; r++) {
+        temp[r] = 0;
+        for (int c = 0; c < filter_size; c++) {
+          temp[r] += x_filter[c] * src[r * filter_size + c];
+        }
+        temp[r] =
+            clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
+        dst_ref += temp[r] * y_filter[r];
+      }
+      dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
+      EXPECT_EQ(dst[0], dst_ref);
    }
-    temp[r] = clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
-    dst_ref += temp[r] * y_filter[r];
  }
-  dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
-  EXPECT_EQ(dst[0], dst_ref);
 }

 TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
@@ -209,42 +225,49 @@ TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
  int dst_stride = 1;
  int x_step_q4 = 16;
  int y_step_q4 = 16;
-  int subpel_x_q4 = 3;
-  int subpel_y_q4 = 2;
  int avg = 0;
  int bd = 10;

  int w = 1;
  int h = 1;

+  int subpel_x_q4;
+  int subpel_y_q4;
+
  for (int i = 0; i < filter_size * filter_size; i++) {
    src0[i] = rnd.Rand16() % (1 << bd);
    src1[i] = rnd.Rand16() % (1 << bd);
  }

-  int offset = filter_size * filter_center + filter_center;
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      int offset = filter_size * filter_center + filter_center;

-  avg = 0;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
-                       filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
-                       y_step_q4, avg, bd);
-  avg = 0;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
-                       filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
-                       y_step_q4, avg, bd);
+      avg = 0;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);
+      avg = 0;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);

-  avg = 0;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
-                       subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
-  avg = 1;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
-                       subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+      avg = 0;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);
+      avg = 1;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);

-  EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+    }
+  }
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/vp10/common/vp10_convolve.c
+++ b/vp10/common/vp10_convolve.c
@@ -1,4 +1,5 @@
 #include <assert.h>
+#include <string.h>

 #include "vp10/common/filter.h"
 #include "vpx_dsp/vpx_dsp_common.h"
@@ -69,22 +70,35 @@ static void convolve_vert(const uint8_t *src, int src_stride, uint8_t *dst,
  }
 }

+static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride, int w, int h, int avg) {
+  if (avg == 0) {
+    int r;
+    for (r = 0; r < h; ++r) {
+      memcpy(dst, src, w);
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    int r, c;
+    for (r = 0; r < h; ++r) {
+      for (c = 0; c < w; ++c) {
+        dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
+      }
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+}
+
 void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                   int dst_stride, int w, int h,
                   const InterpFilterParams filter_params,
                   const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
                   int y_step_q4, int avg) {
  int filter_size = filter_params.tap;
-
-  // temp's size is set to (maximum possible intermediate_height) *
-  // MAX_BLOCK_WIDTH
-  uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
-                MAX_FILTER_TAP) *
-               MAX_BLOCK_WIDTH];
-  int temp_stride = MAX_BLOCK_WIDTH;
-
-  int intermediate_height =
-      (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
@@ -92,11 +106,31 @@ void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
  assert(x_step_q4 <= MAX_STEP);
  assert(filter_params.tap <= MAX_FILTER_TAP);

-  convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
-                 temp_stride, w, intermediate_height, filter_params,
-                 subpel_x_q4, x_step_q4, 0);
-  convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
-                dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
+  if (ignore_horiz && ignore_vert) {
+    convolve_copy(src, src_stride, dst, dst_stride, w, h, avg);
+  } else if (ignore_vert) {
+    convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+                   subpel_x_q4, x_step_q4, avg);
+  } else if (ignore_horiz) {
+    convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+                  subpel_y_q4, y_step_q4, avg);
+  } else {
+    // temp's size is set to (maximum possible intermediate_height) *
+    // MAX_BLOCK_WIDTH
+    uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+                  MAX_FILTER_TAP) *
+                 MAX_BLOCK_WIDTH];
+    int temp_stride = MAX_BLOCK_WIDTH;
+
+    int intermediate_height =
+        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+    convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
+                   temp_stride, w, intermediate_height, filter_params,
+                   subpel_x_q4, x_step_q4, 0);
+    convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
+                  dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
+  }
 }

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -164,23 +198,39 @@ static void highbd_convolve_vert(const uint16_t *src, int src_stride,
  }
 }

+static void highbd_convolve_copy(const uint16_t *src, int src_stride,
+                                 uint16_t *dst, int dst_stride, int w, int h,
+                                 int avg, int bd) {
+  if (avg == 0) {
+    int r;
+    for (r = 0; r < h; ++r) {
+      memcpy(dst, src, w * sizeof(*src));
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    int r, c;
+    for (r = 0; r < h; ++r) {
+      for (c = 0; c < w; ++c) {
+        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
+      }
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+}
+
 void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
                          const int subpel_x_q4, int x_step_q4,
                          const int subpel_y_q4, int y_step_q4, int avg,
                          int bd) {
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int filter_size = filter_params.tap;
-
-  // temp's size is set to (maximum possible intermediate_height) *
-  // MAX_BLOCK_WIDTH
-  uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
-                 MAX_FILTER_TAP) *
-                MAX_BLOCK_WIDTH];
-  int temp_stride = MAX_BLOCK_WIDTH;
-
-  int intermediate_height =
-      (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
@@ -188,12 +238,31 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
  assert(x_step_q4 <= MAX_STEP);
  assert(filter_params.tap <= MAX_FILTER_TAP);

-  highbd_convolve_horiz(
-      CONVERT_TO_SHORTPTR(src8 - src_stride * (filter_size / 2 - 1)),
-      src_stride, temp, temp_stride, w, intermediate_height, filter_params,
-      subpel_x_q4, x_step_q4, 0, bd);
-  highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
-                       CONVERT_TO_SHORTPTR(dst8), dst_stride, w, h,
-                       filter_params, subpel_y_q4, y_step_q4, avg, bd);
+  if (ignore_horiz && ignore_vert) {
+    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, avg, bd);
+  } else if (ignore_vert) {
+    highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+                          subpel_x_q4, x_step_q4, avg, bd);
+  } else if (ignore_horiz) {
+    highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+                         subpel_y_q4, y_step_q4, avg, bd);
+  } else {
+    // temp's size is set to (maximum possible intermediate_height) *
+    // MAX_BLOCK_WIDTH
+    uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+                   MAX_FILTER_TAP) *
+                  MAX_BLOCK_WIDTH];
+    int temp_stride = MAX_BLOCK_WIDTH;
+
+    int intermediate_height =
+        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+    highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
+                          temp, temp_stride, w, intermediate_height,
+                          filter_params, subpel_x_q4, x_step_q4, 0, bd);
+    highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
+                         temp_stride, dst, dst_stride, w, h, filter_params,
+                         subpel_y_q4, y_step_q4, avg, bd);
+  }
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH