
Cherry-Picked the following commits: 0defd8f Changed "WebM" to "AOMedia" & "webm" to "aomedia" 54e6676 Replace "VPx" by "AVx" 5082a36 Change "Vpx" to "Avx" 7df44f1 Replace "Vp9" w/ "Av1" 967f722 Remove kVp9CodecId 828f30c Change "Vp8" to "AOM" 030b5ff AUTHORS regenerated 2524cae Add ref-mv experimental flag 016762b Change copyright notice to AOMedia form 81e5526 Replace vp9 w/ av1 9b94565 Add missing files fa8ca9f Change "vp9" to "av1" ec838b7 Convert "vp8" to "aom" 80edfa0 Change "VP9" to "AV1" d1a11fb Change "vp8" to "aom" 7b58251 Point to WebM test data dd1a5c8 Replace "VP8" with "AOM" ff00fc0 Change "VPX" to "AOM" 01dee0b Change "vp10" to "av1" in source code cebe6f0 Convert "vpx" to "aom" 17b0567 rename vp10*.mk to av1_*.mk fe5f8a8 rename files vp10_* to av1_* Change-Id: I6fc3d18eb11fc171e46140c836ad5339cf6c9419
354 lines
12 KiB
C
354 lines
12 KiB
C
#include <assert.h>
|
|
#include <string.h>
|
|
|
|
#include "./av1_rtcd.h"
|
|
#include "av1/common/av1_convolve.h"
|
|
#include "av1/common/filter.h"
|
|
#include "aom_dsp/aom_dsp_common.h"
|
|
#include "aom_ports/mem.h"
|
|
|
|
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
|
|
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
|
|
#define MAX_STEP (32)
|
|
#define MAX_FILTER_TAP (12)
|
|
|
|
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|
int dst_stride, int w, int h,
|
|
const InterpFilterParams filter_params,
|
|
const int subpel_x_q4, int x_step_q4, int avg) {
|
|
int x, y;
|
|
int filter_size = filter_params.taps;
|
|
src -= filter_size / 2 - 1;
|
|
for (y = 0; y < h; ++y) {
|
|
int x_q4 = subpel_x_q4;
|
|
for (x = 0; x < w; ++x) {
|
|
const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
|
|
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
|
filter_params, x_q4 & SUBPEL_MASK);
|
|
int k, sum = 0;
|
|
for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
|
|
if (avg) {
|
|
dst[x] = ROUND_POWER_OF_TWO(
|
|
dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
|
|
} else {
|
|
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
|
|
}
|
|
x_q4 += x_step_q4;
|
|
}
|
|
src += src_stride;
|
|
dst += dst_stride;
|
|
}
|
|
}
|
|
|
|
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|
int dst_stride, int w, int h,
|
|
const InterpFilterParams filter_params,
|
|
const int subpel_y_q4, int y_step_q4, int avg) {
|
|
int x, y;
|
|
int filter_size = filter_params.taps;
|
|
src -= src_stride * (filter_size / 2 - 1);
|
|
|
|
for (x = 0; x < w; ++x) {
|
|
int y_q4 = subpel_y_q4;
|
|
for (y = 0; y < h; ++y) {
|
|
const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
|
|
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
|
filter_params, y_q4 & SUBPEL_MASK);
|
|
int k, sum = 0;
|
|
for (k = 0; k < filter_size; ++k)
|
|
sum += src_y[k * src_stride] * y_filter[k];
|
|
if (avg) {
|
|
dst[y * dst_stride] = ROUND_POWER_OF_TWO(
|
|
dst[y * dst_stride] +
|
|
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
|
|
1);
|
|
} else {
|
|
dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
|
|
}
|
|
y_q4 += y_step_q4;
|
|
}
|
|
++src;
|
|
++dst;
|
|
}
|
|
}
|
|
|
|
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
|
|
int dst_stride, int w, int h, int avg) {
|
|
if (avg == 0) {
|
|
int r;
|
|
for (r = 0; r < h; ++r) {
|
|
memcpy(dst, src, w);
|
|
src += src_stride;
|
|
dst += dst_stride;
|
|
}
|
|
} else {
|
|
int r, c;
|
|
for (r = 0; r < h; ++r) {
|
|
for (c = 0; c < w; ++c) {
|
|
dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
|
|
}
|
|
src += src_stride;
|
|
dst += dst_stride;
|
|
}
|
|
}
|
|
}
|
|
|
|
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
|
|
int dst_stride, int w, int h,
|
|
#if CONFIG_DUAL_FILTER
|
|
const INTERP_FILTER *interp_filter,
|
|
#else
|
|
const INTERP_FILTER interp_filter,
|
|
#endif
|
|
const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
|
|
int y_step_q4, int ref_idx) {
|
|
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
|
|
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
|
|
|
|
assert(w <= MAX_BLOCK_WIDTH);
|
|
assert(h <= MAX_BLOCK_HEIGHT);
|
|
assert(y_step_q4 <= MAX_STEP);
|
|
assert(x_step_q4 <= MAX_STEP);
|
|
|
|
if (ignore_horiz && ignore_vert) {
|
|
convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx);
|
|
} else if (ignore_vert) {
|
|
#if CONFIG_DUAL_FILTER
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
|
|
#else
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter);
|
|
#endif
|
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
|
av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
|
|
subpel_x_q4, x_step_q4, ref_idx);
|
|
} else if (ignore_horiz) {
|
|
#if CONFIG_DUAL_FILTER
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter[2 * ref_idx]);
|
|
#else
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter);
|
|
#endif
|
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
|
av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
|
|
subpel_y_q4, y_step_q4, ref_idx);
|
|
} else {
|
|
// temp's size is set to (maximum possible intermediate_height) *
|
|
// MAX_BLOCK_WIDTH
|
|
uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
|
|
MAX_FILTER_TAP) *
|
|
MAX_BLOCK_WIDTH];
|
|
int temp_stride = MAX_BLOCK_WIDTH;
|
|
#if CONFIG_DUAL_FILTER
|
|
InterpFilterParams filter_params_x =
|
|
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
|
|
InterpFilterParams filter_params_y =
|
|
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
|
|
InterpFilterParams filter_params = filter_params_x;
|
|
|
|
// The filter size implies the required number of reference pixels for
|
|
// the second stage filtering. It is possible that the two directions
|
|
// require different filter sizes.
|
|
int filter_size = filter_params_y.taps;
|
|
#else
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter);
|
|
int filter_size = filter_params.taps;
|
|
#endif
|
|
int intermediate_height =
|
|
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
|
|
|
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
|
|
|
av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
|
|
temp, temp_stride, w, intermediate_height, filter_params,
|
|
subpel_x_q4, x_step_q4, 0);
|
|
|
|
#if CONFIG_DUAL_FILTER
|
|
filter_params = filter_params_y;
|
|
#else
|
|
filter_params = av1_get_interp_filter_params(interp_filter);
|
|
#endif
|
|
filter_size = filter_params.taps;
|
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
|
|
|
av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
|
|
dst, dst_stride, w, h, filter_params, subpel_y_q4,
|
|
y_step_q4, ref_idx);
|
|
}
|
|
}
|
|
|
|
#if CONFIG_AOM_HIGHBITDEPTH
|
|
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
|
|
uint16_t *dst, int dst_stride, int w, int h,
|
|
const InterpFilterParams filter_params,
|
|
const int subpel_x_q4, int x_step_q4, int avg,
|
|
int bd) {
|
|
int x, y;
|
|
int filter_size = filter_params.taps;
|
|
src -= filter_size / 2 - 1;
|
|
for (y = 0; y < h; ++y) {
|
|
int x_q4 = subpel_x_q4;
|
|
for (x = 0; x < w; ++x) {
|
|
const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
|
|
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
|
filter_params, x_q4 & SUBPEL_MASK);
|
|
int k, sum = 0;
|
|
for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
|
|
if (avg)
|
|
dst[x] = ROUND_POWER_OF_TWO(
|
|
dst[x] +
|
|
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
|
|
1);
|
|
else
|
|
dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
|
x_q4 += x_step_q4;
|
|
}
|
|
src += src_stride;
|
|
dst += dst_stride;
|
|
}
|
|
}
|
|
|
|
void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
|
|
uint16_t *dst, int dst_stride, int w, int h,
|
|
const InterpFilterParams filter_params,
|
|
const int subpel_y_q4, int y_step_q4, int avg,
|
|
int bd) {
|
|
int x, y;
|
|
int filter_size = filter_params.taps;
|
|
src -= src_stride * (filter_size / 2 - 1);
|
|
|
|
for (x = 0; x < w; ++x) {
|
|
int y_q4 = subpel_y_q4;
|
|
for (y = 0; y < h; ++y) {
|
|
const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
|
|
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
|
filter_params, y_q4 & SUBPEL_MASK);
|
|
int k, sum = 0;
|
|
for (k = 0; k < filter_size; ++k)
|
|
sum += src_y[k * src_stride] * y_filter[k];
|
|
if (avg) {
|
|
dst[y * dst_stride] = ROUND_POWER_OF_TWO(
|
|
dst[y * dst_stride] +
|
|
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
|
|
1);
|
|
} else {
|
|
dst[y * dst_stride] =
|
|
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
|
|
}
|
|
y_q4 += y_step_q4;
|
|
}
|
|
++src;
|
|
++dst;
|
|
}
|
|
}
|
|
|
|
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
|
|
uint16_t *dst, int dst_stride, int w, int h,
|
|
int avg, int bd) {
|
|
if (avg == 0) {
|
|
int r;
|
|
for (r = 0; r < h; ++r) {
|
|
memcpy(dst, src, w * sizeof(*src));
|
|
src += src_stride;
|
|
dst += dst_stride;
|
|
}
|
|
} else {
|
|
int r, c;
|
|
for (r = 0; r < h; ++r) {
|
|
for (c = 0; c < w; ++c) {
|
|
dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
|
|
}
|
|
src += src_stride;
|
|
dst += dst_stride;
|
|
}
|
|
}
|
|
}
|
|
|
|
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
|
|
int dst_stride, int w, int h,
|
|
#if CONFIG_DUAL_FILTER
|
|
const INTERP_FILTER *interp_filter,
|
|
#else
|
|
const INTERP_FILTER interp_filter,
|
|
#endif
|
|
const int subpel_x_q4, int x_step_q4,
|
|
const int subpel_y_q4, int y_step_q4, int ref_idx,
|
|
int bd) {
|
|
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
|
|
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
|
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
|
|
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
|
|
|
|
assert(w <= MAX_BLOCK_WIDTH);
|
|
assert(h <= MAX_BLOCK_HEIGHT);
|
|
assert(y_step_q4 <= MAX_STEP);
|
|
assert(x_step_q4 <= MAX_STEP);
|
|
|
|
if (ignore_horiz && ignore_vert) {
|
|
highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
|
|
} else if (ignore_vert) {
|
|
#if CONFIG_DUAL_FILTER
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
|
|
#else
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter);
|
|
#endif
|
|
av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
|
|
filter_params, subpel_x_q4, x_step_q4, ref_idx,
|
|
bd);
|
|
} else if (ignore_horiz) {
|
|
#if CONFIG_DUAL_FILTER
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
|
|
#else
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter);
|
|
#endif
|
|
av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
|
|
filter_params, subpel_y_q4, y_step_q4, ref_idx,
|
|
bd);
|
|
} else {
|
|
// temp's size is set to (maximum possible intermediate_height) *
|
|
// MAX_BLOCK_WIDTH
|
|
uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
|
|
MAX_FILTER_TAP) *
|
|
MAX_BLOCK_WIDTH];
|
|
int temp_stride = MAX_BLOCK_WIDTH;
|
|
|
|
#if CONFIG_DUAL_FILTER
|
|
InterpFilterParams filter_params_x =
|
|
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
|
|
InterpFilterParams filter_params_y =
|
|
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
|
|
InterpFilterParams filter_params = filter_params_x;
|
|
int filter_size = filter_params_y.taps;
|
|
#else
|
|
InterpFilterParams filter_params =
|
|
av1_get_interp_filter_params(interp_filter);
|
|
int filter_size = filter_params.taps;
|
|
#endif
|
|
|
|
int intermediate_height =
|
|
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
|
|
|
|
av1_highbd_convolve_horiz(
|
|
src - src_stride * (filter_size / 2 - 1), src_stride, temp, temp_stride,
|
|
w, intermediate_height, filter_params, subpel_x_q4, x_step_q4, 0, bd);
|
|
|
|
#if CONFIG_DUAL_FILTER
|
|
filter_params = filter_params_y;
|
|
#endif
|
|
filter_size = filter_params.taps;
|
|
assert(filter_params.taps <= MAX_FILTER_TAP);
|
|
|
|
av1_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
|
|
temp_stride, dst, dst_stride, w, h, filter_params,
|
|
subpel_y_q4, y_step_q4, ref_idx, bd);
|
|
}
|
|
}
|
|
#endif // CONFIG_AOM_HIGHBITDEPTH
|