diff --git a/test/convolve_test.cc b/test/convolve_test.cc new file mode 100644 index 000000000..354384063 --- /dev/null +++ b/test/convolve_test.cc @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +extern "C" { +#include "./vpx_config.h" +#include "./vp9_rtcd.h" +} +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/acm_random.h" +#include "test/register_state_check.h" +#include "test/util.h" + +namespace { +typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int filter_x_stride, + const int16_t *filter_y, int filter_y_stride, + int w, int h); + +struct ConvolveFunctions { + ConvolveFunctions(convolve_fn_t h8, convolve_fn_t h8_avg, + convolve_fn_t v8, convolve_fn_t v8_avg, + convolve_fn_t hv8, convolve_fn_t hv8_avg) + : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg), + hv8_avg_(hv8_avg) {} + + convolve_fn_t h8_; + convolve_fn_t v8_; + convolve_fn_t hv8_; + convolve_fn_t h8_avg_; + convolve_fn_t v8_avg_; + convolve_fn_t hv8_avg_; +}; + +// Reference 8-tap subpixel filter, slightly modified to fit into this test. +#define VP9_FILTER_WEIGHT 128 +#define VP9_FILTER_SHIFT 7 +static uint8_t clip_pixel(int x) { + return x < 0 ? 0 : + x > 255 ? 255 : + x; +} + +static void filter_block2d_8_c(const uint8_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, + const int16_t *VFilter, + uint8_t *dst_ptr, + unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height) { + // Between passes, we use an intermediate buffer whose height is extended to + // have enough horizontally filtered values as input for the vertical pass. + // This buffer is allocated to be big enough for the largest block type we + // support. + const int kInterp_Extend = 4; + const unsigned int intermediate_height = + (kInterp_Extend - 1) + output_height + kInterp_Extend; + + /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, + * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height + * + kInterp_Extend + * = 3 + 16 + 4 + * = 23 + * and filter_max_width = 16 + */ + uint8_t intermediate_buffer[23 * 16]; + const int intermediate_next_stride = 1 - intermediate_height * output_width; + + // Horizontal pass (src -> transposed intermediate). + { + uint8_t *output_ptr = intermediate_buffer; + const int src_next_row_stride = src_stride - output_width; + unsigned int i, j; + src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); + for (i = 0; i < intermediate_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + int temp = ((int)src_ptr[0] * HFilter[0]) + + ((int)src_ptr[1] * HFilter[1]) + + ((int)src_ptr[2] * HFilter[2]) + + ((int)src_ptr[3] * HFilter[3]) + + ((int)src_ptr[4] * HFilter[4]) + + ((int)src_ptr[5] * HFilter[5]) + + ((int)src_ptr[6] * HFilter[6]) + + ((int)src_ptr[7] * HFilter[7]) + + (VP9_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); + ++src_ptr; + output_ptr += intermediate_height; + } + src_ptr += src_next_row_stride; + output_ptr += intermediate_next_stride; + } + } + + // Vertical pass (transposed intermediate -> dst). + { + uint8_t *src_ptr = intermediate_buffer; + const int dst_next_row_stride = dst_stride - output_width; + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + // Apply filter... + int temp = ((int)src_ptr[0] * VFilter[0]) + + ((int)src_ptr[1] * VFilter[1]) + + ((int)src_ptr[2] * VFilter[2]) + + ((int)src_ptr[3] * VFilter[3]) + + ((int)src_ptr[4] * VFilter[4]) + + ((int)src_ptr[5] * VFilter[5]) + + ((int)src_ptr[6] * VFilter[6]) + + ((int)src_ptr[7] * VFilter[7]) + + (VP9_FILTER_WEIGHT >> 1); // Rounding + + // Normalize back to 0-255... + *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); + src_ptr += intermediate_height; + } + src_ptr += intermediate_next_stride; + dst_ptr += dst_next_row_stride; + } + } +} + +static void block2d_average_c(uint8_t *src, + unsigned int src_stride, + uint8_t *output_ptr, + unsigned int output_stride, + unsigned int output_width, + unsigned int output_height) { + unsigned int i, j; + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; + } + output_ptr += output_stride; + } +} + +static void filter_average_block2d_8_c(const uint8_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, + const int16_t *VFilter, + uint8_t *dst_ptr, + unsigned int dst_stride, + unsigned int output_width, + unsigned int output_height) { + uint8_t tmp[16*16]; + + assert(output_width <= 16); + assert(output_height <= 16); + filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 16, + output_width, output_height); + block2d_average_c(tmp, 16, dst_ptr, dst_stride, + output_width, output_height); +} + +class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) { + protected: + static const int kOuterBlockSize = 32; + static const int kInputStride = kOuterBlockSize; + static const int kOutputStride = kOuterBlockSize; + static const int kMaxDimension = 16; + + int Width() const { return GET_PARAM(0); } + int Height() const { return GET_PARAM(1); } + int BorderLeft() const { return (kOuterBlockSize - Width()) / 2; } + int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } + + bool IsIndexInBorder(int i) { + return (i < BorderTop() * kOuterBlockSize || + i >= (BorderTop() + Height()) * kOuterBlockSize || + i % kOuterBlockSize < BorderLeft() || + i % kOuterBlockSize >= (BorderLeft() + Width())); + } + + virtual void SetUp() { + UUT_ = GET_PARAM(2); + memset(input_, 0, sizeof(input_)); + /* Set up guard blocks for an inner block cetered in the outer block */ + for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) { + if (IsIndexInBorder(i)) + output_[i] = 255; + else + output_[i] = 0; + } + + ::libvpx_test::ACMRandom prng; + for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) + input_[i] = prng.Rand8(); + } + + void CheckGuardBlocks() { + for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) { + if (IsIndexInBorder(i)) + EXPECT_EQ(255, output_[i]); + } + } + + uint8_t* input() { + return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); + } + + uint8_t* output() { + return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); + } + + const ConvolveFunctions* UUT_; + uint8_t input_[kOuterBlockSize * kOuterBlockSize]; + uint8_t output_[kOuterBlockSize * kOuterBlockSize]; +}; + +TEST_P(ConvolveTest, GuardBlocks) { + CheckGuardBlocks(); +} + +TEST_P(ConvolveTest, CopyHoriz) { + uint8_t* const in = input(); + uint8_t* const out = output(); + const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0}; + + REGISTER_STATE_CHECK( + UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, + Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) + << "(" << x << "," << y << ")"; +} + +TEST_P(ConvolveTest, CopyVert) { + uint8_t* const in = input(); + uint8_t* const out = output(); + const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0}; + + REGISTER_STATE_CHECK( + UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, + Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) + << "(" << x << "," << y << ")"; +} + +TEST_P(ConvolveTest, Copy2D) { + uint8_t* const in = input(); + uint8_t* const out = output(); + const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0}; + + REGISTER_STATE_CHECK( + UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, + Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) + << "(" << x << "," << y << ")"; +} + +TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { + uint8_t* const in = input(); + uint8_t* const out = output(); + uint8_t ref[kOutputStride * kMaxDimension]; + + const int16_t filters[][8] = { + { 0, 0, 0, 128, 0, 0, 0, 0}, + { 0, 1, -5, 126, 8, -3, 1, 0}, + { -1, 3, -10, 122, 18, -6, 2, 0}, + { -1, 4, -13, 118, 27, -9, 3, -1}, + { -1, 4, -16, 112, 37, -11, 4, -1}, + { -1, 5, -18, 105, 48, -14, 4, -1}, + { -1, 5, -19, 97, 58, -16, 5, -1}, + { -1, 6, -19, 88, 68, -18, 5, -1}, + { -1, 6, -19, 78, 78, -19, 6, -1}, + { -1, 5, -18, 68, 88, -19, 6, -1}, + { -1, 5, -16, 58, 97, -19, 5, -1}, + { -1, 4, -14, 48, 105, -18, 5, -1}, + { -1, 4, -11, 37, 112, -16, 4, -1}, + { -1, 3, -9, 27, 118, -13, 4, -1}, + { 0, 2, -6, 18, 122, -10, 3, -1}, + { 0, 1, -3, 8, 126, -5, 1, 0} + }; + + const int kNumFilters = sizeof(filters) / sizeof(filters[0]); + + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + filter_block2d_8_c(in, kInputStride, + filters[filter_x], filters[filter_y], + ref, kOutputStride, + Width(), Height()); + + if (filter_x && filter_y) + REGISTER_STATE_CHECK( + UUT_->hv8_(in, kInputStride, out, kOutputStride, + filters[filter_x], 16, filters[filter_y], 16, + Width(), Height())); + else if (filter_y) + REGISTER_STATE_CHECK( + UUT_->v8_(in, kInputStride, out, kOutputStride, + filters[filter_x], 16, filters[filter_y], 16, + Width(), Height())); + else + REGISTER_STATE_CHECK( + UUT_->h8_(in, kInputStride, out, kOutputStride, + filters[filter_x], 16, filters[filter_y], 16, + Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x]) + << "mismatch at (" << x << "," << y << "), " + << "filters (" << filter_x << "," << filter_y << ")"; + } + } +} + +TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { + uint8_t* const in = input(); + uint8_t* const out = output(); + uint8_t ref[kOutputStride * kMaxDimension]; + + // Populate ref and out with some random data + ::libvpx_test::ACMRandom prng; + for (int y = 0; y < Height(); ++y) { + for (int x = 0; x < Width(); ++x) { + const uint8_t r = prng.Rand8(); + + out[y * kOutputStride + x] = r; + ref[y * kOutputStride + x] = r; + } + } + + const int16_t filters[][8] = { + { 0, 0, 0, 128, 0, 0, 0, 0}, + { 0, 1, -5, 126, 8, -3, 1, 0}, + { -1, 3, -10, 122, 18, -6, 2, 0}, + { -1, 4, -13, 118, 27, -9, 3, -1}, + { -1, 4, -16, 112, 37, -11, 4, -1}, + { -1, 5, -18, 105, 48, -14, 4, -1}, + { -1, 5, -19, 97, 58, -16, 5, -1}, + { -1, 6, -19, 88, 68, -18, 5, -1}, + { -1, 6, -19, 78, 78, -19, 6, -1}, + { -1, 5, -18, 68, 88, -19, 6, -1}, + { -1, 5, -16, 58, 97, -19, 5, -1}, + { -1, 4, -14, 48, 105, -18, 5, -1}, + { -1, 4, -11, 37, 112, -16, 4, -1}, + { -1, 3, -9, 27, 118, -13, 4, -1}, + { 0, 2, -6, 18, 122, -10, 3, -1}, + { 0, 1, -3, 8, 126, -5, 1, 0} + }; + + const int kNumFilters = sizeof(filters) / sizeof(filters[0]); + + for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { + for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { + filter_average_block2d_8_c(in, kInputStride, + filters[filter_x], filters[filter_y], + ref, kOutputStride, + Width(), Height()); + + if (filter_x && filter_y) + REGISTER_STATE_CHECK( + UUT_->hv8_avg_(in, kInputStride, out, kOutputStride, + filters[filter_x], 16, filters[filter_y], 16, + Width(), Height())); + else if (filter_y) + REGISTER_STATE_CHECK( + UUT_->v8_avg_(in, kInputStride, out, kOutputStride, + filters[filter_x], 16, filters[filter_y], 16, + Width(), Height())); + else + REGISTER_STATE_CHECK( + UUT_->h8_avg_(in, kInputStride, out, kOutputStride, + filters[filter_x], 16, filters[filter_y], 16, + Width(), Height())); + + CheckGuardBlocks(); + + for (int y = 0; y < Height(); ++y) + for (int x = 0; x < Width(); ++x) + ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x]) + << "mismatch at (" << x << "," << y << "), " + << "filters (" << filter_x << "," << filter_y << ")"; + } + } +} + +TEST_P(ConvolveTest, ChangeFilterWorks) { + uint8_t* const in = input(); + uint8_t* const out = output(); + + const int16_t filters[][8] = { + { 0, 0, 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 128}, + { 0, 0, 0, 128}, + { 0, 0, 128}, + { 0, 128}, + { 128}, + { 0, 0, 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 128}, + { 0, 0, 0, 128}, + { 0, 0, 128}, + { 0, 128}, + { 128}, + { 0, 0, 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 0, 128}, + { 0, 0, 0, 0, 128}, + { 0, 0, 0, 128}, + { 0, 0, 128}, + { 0, 128}, + { 128}, + }; + + REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride, + filters[0], 17, filters[4], 16, + Width(), Height())); + + for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) { + ASSERT_EQ(in[4], out[x]) << "x == " << x; + } + + REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride, + filters[4], 16, filters[0], 17, + Width(), Height())); + + for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) { + ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y; + } + + REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride, + filters[0], 17, filters[0], 17, + Width(), Height())); + + for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) { + for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) { + ASSERT_EQ(in[4 * kInputStride + 4], out[y * kOutputStride + x]) + << "x == " << x << ", y == " << y; + } + } +} + + +using std::tr1::make_tuple; + +const ConvolveFunctions convolve8_2d_only_c( + vp9_convolve8_c, vp9_convolve8_avg_c, + vp9_convolve8_c, vp9_convolve8_avg_c, + vp9_convolve8_c, vp9_convolve8_avg_c); + +const ConvolveFunctions convolve8_c( + vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c, + vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c, + vp9_convolve8_c, vp9_convolve8_avg_c); + +INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( + make_tuple(4, 4, &convolve8_2d_only_c), + make_tuple(8, 4, &convolve8_2d_only_c), + make_tuple(8, 8, &convolve8_2d_only_c), + make_tuple(16, 16, &convolve8_2d_only_c), + make_tuple(4, 4, &convolve8_c), + make_tuple(8, 4, &convolve8_c), + make_tuple(8, 8, &convolve8_c), + make_tuple(16, 16, &convolve8_c))); +} diff --git a/test/test.mk b/test/test.mk index f275a47f2..46b055e23 100644 --- a/test/test.mk +++ b/test/test.mk @@ -68,6 +68,7 @@ LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc LIBVPX_TEST_SRCS-yes += idct8x8_test.cc endif +LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c new file mode 100644 index 000000000..ed188c3f2 --- /dev/null +++ b/vp9/common/vp9_convolve.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include "./vpx_config.h" +#include "./vp9_rtcd.h" +#include "vp9/common/vp9_common.h" +#include "vpx/vpx_integer.h" + +#define VP9_FILTER_WEIGHT 128 +#define VP9_FILTER_SHIFT 7 +#define ALIGN_FILTERS_256 0 + +/* Assume a bank of 16 filters to choose from. There are two implementations + * for filter wrapping behavior, since we want to be able to pick which filter + * to start with. We could either: + * + * 1) make filter_ a pointer to the base of the filter array, and then add an + * additional offset parameter, to choose the starting filter. + * 2) use a pointer to 2 periods worth of filters, so that even if the original + * phase offset is at 15/16, we'll have valid data to read. The filter + * tables become [32][8], and the second half is duplicated. + * 3) fix the alignment of the filter tables, so that we know the 0/16 is + * always 256 byte aligned. + * + * Implementations 2 and 3 are likely preferable, as they avoid an extra 2 + * parameters, and switching between them is trivial. + */ +static void convolve_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x0, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int taps) { + int x, y, k, sum; + const int16_t *filter_x_base = filter_x0; + +#if ALIGN_FILTERS_256 + filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source line */ + src -= taps / 2 - 1; + + for (y = 0; y < h; ++y) { + /* Pointer to filter to use */ + const int16_t *filter_x = filter_x0; + + /* Initial phase offset */ + int x_q4 = (filter_x - filter_x_base) / taps; + + for (x = 0; x < w; ++x) { + /* Per-pixel src offset */ + int src_x = x_q4 >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[src_x + k] * filter_x[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT); + + /* Adjust source and filter to use for the next pixel */ + x_q4 += x_step_q4; + filter_x = filter_x_base + (x_q4 & 0xf) * taps; + } + src += src_stride; + dst += dst_stride; + } +} + +static void convolve_avg_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x0, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int taps) { + int x, y, k, sum; + const int16_t *filter_x_base = filter_x0; + +#if ALIGN_FILTERS_256 + filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source line */ + src -= taps / 2 - 1; + + for (y = 0; y < h; ++y) { + /* Pointer to filter to use */ + const int16_t *filter_x = filter_x0; + + /* Initial phase offset */ + int x_q4 = (filter_x - filter_x_base) / taps; + + for (x = 0; x < w; ++x) { + /* Per-pixel src offset */ + int src_x = x_q4 >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[src_x + k] * filter_x[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1; + + /* Adjust source and filter to use for the next pixel */ + x_q4 += x_step_q4; + filter_x = filter_x_base + (x_q4 & 0xf) * taps; + } + src += src_stride; + dst += dst_stride; + } +} + +static void convolve_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y0, int y_step_q4, + int w, int h, int taps) { + int x, y, k, sum; + + const int16_t *filter_y_base = filter_y0; + +#if ALIGN_FILTERS_256 + filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source column */ + src -= src_stride * (taps / 2 - 1); + for (x = 0; x < w; ++x) { + /* Pointer to filter to use */ + const int16_t *filter_y = filter_y0; + + /* Initial phase offset */ + int y_q4 = (filter_y - filter_y_base) / taps; + + for (y = 0; y < h; ++y) { + /* Per-pixel src offset */ + int src_y = y_q4 >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[(src_y + k) * src_stride] * filter_y[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT); + + /* Adjust source and filter to use for the next pixel */ + y_q4 += y_step_q4; + filter_y = filter_y_base + (y_q4 & 0xf) * taps; + } + ++src; + ++dst; + } +} + +static void convolve_avg_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y0, int y_step_q4, + int w, int h, int taps) { + int x, y, k, sum; + + const int16_t *filter_y_base = filter_y0; + +#if ALIGN_FILTERS_256 + filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); +#endif + + /* Adjust base pointer address for this source column */ + src -= src_stride * (taps / 2 - 1); + for (x = 0; x < w; ++x) { + /* Pointer to filter to use */ + const int16_t *filter_y = filter_y0; + + /* Initial phase offset */ + int y_q4 = (filter_y - filter_y_base) / taps; + + for (y = 0; y < h; ++y) { + /* Per-pixel src offset */ + int src_y = y_q4 >> 4; + + for (sum = 0, k = 0; k < taps; ++k) { + sum += src[(src_y + k) * src_stride] * filter_y[k]; + } + sum += (VP9_FILTER_WEIGHT >> 1); + dst[y * dst_stride] = + (dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1; + + /* Adjust source and filter to use for the next pixel */ + y_q4 += y_step_q4; + filter_y = filter_y_base + (y_q4 & 0xf) * taps; + } + ++src; + ++dst; + } +} + +static void convolve_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int taps) { + /* Fixed size intermediate buffer places limits on parameters. */ + uint8_t temp[16 * 23]; + assert(w <= 16); + assert(h <= 16); + assert(taps <= 8); + + convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, + temp, 16, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h + taps - 1, taps); + convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, taps); +} + +static void convolve_avg_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h, int taps) { + /* Fixed size intermediate buffer places limits on parameters. */ + uint8_t temp[16 * 23]; + assert(w <= 16); + assert(h <= 16); + assert(taps <= 8); + + convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, + temp, 16, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h + taps - 1, taps); + convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, taps); +} + +void vp9_convolve8_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8); +} + +void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_avg_horiz_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8); +} + +void vp9_convolve8_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8); +} + +void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_avg_vert_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8); +} + +void vp9_convolve8_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8); +} + +void vp9_convolve8_avg_c(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h) { + convolve_avg_c(src, src_stride, dst, dst_stride, + filter_x, x_step_q4, filter_y, y_step_q4, + w, h, 8); +} diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h new file mode 100644 index 000000000..46c935ab7 --- /dev/null +++ b/vp9/common/vp9_convolve.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_COMMON_CONVOLVE_H_ +#define VP9_COMMON_CONVOLVE_H_ + +#include "vpx/vpx_integer.h" + +typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +// Not a convolution, a block copy conforming to the convolution prototype +void vp9_convolve_copy(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +// Not a convolution, a block average conforming to the convolution prototype +void vp9_convolve_avg(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int16_t *filter_x, int x_step_q4, + const int16_t *filter_y, int y_step_q4, + int w, int h); + +struct subpix_fn_table { + convolve_fn_t predict[2][2][2]; // horiz, vert, avg + const int16_t (*filter_x)[8]; + const int16_t (*filter_y)[8]; + int x_step_q4; + int y_step_q4; +}; + +#endif // VP9_COMMON_CONVOLVE_H_ diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 39af2080a..762dd75c0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -269,6 +269,24 @@ specialize vp9_sub_pixel_variance16x2 sse2 # # Sub Pixel Filters # +prototype void vp9_convolve8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8 + +prototype void vp9_convolve8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_horiz + +prototype void vp9_convolve8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_vert + +prototype void vp9_convolve8_avg "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_avg + +prototype void vp9_convolve8_avg_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_avg_horiz + +prototype void vp9_convolve8_avg_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" +specialize vp9_convolve8_avg_vert + prototype void vp9_eighttap_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_eighttap_predict16x16 diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 0d208e9a3..d1805be62 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -16,6 +16,8 @@ VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_asm_com_offsets.c VP9_COMMON_SRCS-yes += common/vp9_blockd.c VP9_COMMON_SRCS-yes += common/vp9_coefupdateprobs.h +VP9_COMMON_SRCS-yes += common/vp9_convolve.c +VP9_COMMON_SRCS-yes += common/vp9_convolve.h VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c VP9_COMMON_SRCS-yes += common/vp9_default_coef_probs.h VP9_COMMON_SRCS-yes += common/vp9_entropy.c