vpx/av1/common/clpf.c
Yaowu Xu d71be7815d Revert "Revert "Move CLPF block signals from frame to SB level.""
This reverts commit 9b25f3067485b32442e13964df098903736c3fd8 to
reinstate the reverted commit with fixes that solved the build issues
when --enalbe-clpf is used in configure.

Change-Id: I15447cae7fa9b3deb27976345dc3db230a4a7a60
2016-10-14 08:58:49 -07:00

285 lines
12 KiB
C

/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/clpf.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_image.h"
#include "aom_dsp/aom_dsp_common.h"
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
int delta = 4 * clamp(A - X, -b, b) + clamp(B - X, -b, b) +
3 * clamp(C - X, -b, b) + 3 * clamp(D - X, -b, b) +
clamp(E - X, -b, b) + 4 * clamp(F - X, -b, b);
return (8 + delta - (delta < 0)) >> 4;
}
void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey,
int width, int height, unsigned int strength) {
int x, y;
for (y = y0; y < y0 + sizey; y++) {
for (x = x0; x < x0 + sizex; x++) {
int X = src[y * sstride + x];
int A = src[AOMMAX(0, y - 1) * sstride + x];
int B = src[y * sstride + AOMMAX(0, x - 2)];
int C = src[y * sstride + AOMMAX(0, x - 1)];
int D = src[y * sstride + AOMMIN(width - 1, x + 1)];
int E = src[y * sstride + AOMMIN(width - 1, x + 2)];
int F = src[AOMMIN(height - 1, y + 1) * sstride + x];
int delta;
delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
dst[y * dstride + x] = X + delta;
}
}
}
#if CONFIG_AOM_HIGHBITDEPTH
// Identical to aom_clpf_block_c() apart from "src" and "dst".
void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey,
int width, int height, unsigned int strength) {
int x, y;
for (y = y0; y < y0 + sizey; y++) {
for (x = x0; x < x0 + sizex; x++) {
int X = src[y * sstride + x];
int A = src[AOMMAX(0, y - 1) * sstride + x];
int B = src[y * sstride + AOMMAX(0, x - 2)];
int C = src[y * sstride + AOMMAX(0, x - 1)];
int D = src[y * sstride + AOMMIN(width - 1, x + 1)];
int E = src[y * sstride + AOMMIN(width - 1, x + 2)];
int F = src[AOMMIN(height - 1, y + 1) * sstride + x];
int delta;
delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
dst[y * dstride + x] = X + delta;
}
}
}
#endif
// Return number of filtered blocks
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, int plane,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, int8_t *)) {
/* Constrained low-pass filter (CLPF) */
int c, k, l, m, n;
const int subx = plane != AOM_PLANE_Y && frame->subsampling_x;
const int suby = plane != AOM_PLANE_Y && frame->subsampling_y;
const int bs = (subx || suby) ? 4 : 8;
const int bslog = get_msb(bs);
int width = plane != AOM_PLANE_Y ? frame->uv_crop_width : frame->y_crop_width;
int height =
plane != AOM_PLANE_Y ? frame->uv_crop_height : frame->y_crop_height;
int xpos, ypos;
const int sstride = plane != AOM_PLANE_Y ? frame->uv_stride : frame->y_stride;
int dstride = bs;
const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
uint8_t *cache = NULL;
uint8_t **cache_ptr = NULL;
uint8_t **cache_dst = NULL;
int cache_idx = 0;
const int cache_size = num_fb_hor << (2 * fb_size_log2);
const int cache_blocks = cache_size / (bs * bs);
uint8_t *src_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? frame->u_buffer : frame->v_buffer)
: frame->y_buffer;
uint8_t *dst_buffer;
// Make buffer space for in-place filtering
#if CONFIG_AOM_HIGHBITDEPTH
strength <<= (cm->bit_depth - 8);
CHECK_MEM_ERROR(cm, cache, aom_malloc(cache_size << !!cm->use_highbitdepth));
dst_buffer = cm->use_highbitdepth ? CONVERT_TO_BYTEPTR(cache) : cache;
#else
CHECK_MEM_ERROR(cm, cache, aom_malloc(cache_size));
dst_buffer = cache;
#endif
CHECK_MEM_ERROR(cm, cache_ptr, aom_malloc(cache_blocks * sizeof(*cache_ptr)));
CHECK_MEM_ERROR(cm, cache_dst, aom_malloc(cache_blocks * sizeof(*cache_dst)));
memset(cache_ptr, 0, cache_blocks * sizeof(*cache_dst));
// Iterate over all filter blocks
for (k = 0; k < num_fb_ver; k++) {
for (l = 0; l < num_fb_hor; l++) {
int h, w;
int allskip = !(enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2);
const int xoff = l << fb_size_log2;
const int yoff = k << fb_size_log2;
for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) {
for (n = 0; allskip && n < (1 << fb_size_log2) / bs; n++) {
xpos = xoff + n * bs;
ypos = yoff + m * bs;
if (xpos < width && ypos < height) {
allskip &=
cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip;
}
}
}
// Calculate the actual filter block size near frame edges
h = AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
w = AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
h += !h << fb_size_log2;
w += !w << fb_size_log2;
if (!allskip && // Do not filter the block if all is skip encoded
(!enable_fb_flag ||
// Only called if fb_flag enabled (luma only)
decision(k, l, frame, org, cm, bs, w / bs, h / bs, strength,
fb_size_log2,
cm->clpf_blocks + yoff / MIN_FB_SIZE * cm->clpf_stride +
xoff / MIN_FB_SIZE))) {
// Iterate over all smaller blocks inside the filter block
for (m = 0; m < ((h + bs - 1) >> bslog); m++) {
for (n = 0; n < ((w + bs - 1) >> bslog); n++) {
int sizex, sizey;
xpos = xoff + n * bs;
ypos = yoff + m * bs;
sizex = AOMMIN(width - xpos, bs);
sizey = AOMMIN(height - ypos, bs);
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip ||
(enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2)) {
// Temporary buffering needed for in-place filtering
if (cache_ptr[cache_idx]) {
// Copy filtered block back into the frame
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
uint16_t *const d = CONVERT_TO_SHORTPTR(cache_dst[cache_idx]);
if (sizex == 8) {
for (c = 0; c < sizey; c++) {
*(uint64_t *)(d + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2);
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
}
} else if (sizex == 4) {
for (c = 0; c < sizey; c++)
*(uint64_t *)(d + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2);
} else {
for (c = 0; c < sizey; c++)
memcpy(d + c * sstride, cache_ptr[cache_idx] + c * bs * 2,
sizex);
}
} else {
if (sizex == 8)
for (c = 0; c < sizey; c++)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
else if (sizex == 4)
for (c = 0; c < sizey; c++)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
for (c = 0; c < sizey; c++)
memcpy(cache_dst[cache_idx] + c * sstride,
cache_ptr[cache_idx] + c * bs, sizex);
}
#else
if (sizex == 8)
for (c = 0; c < sizey; c++)
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
else if (sizex == 4)
for (c = 0; c < sizey; c++)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
for (c = 0; c < sizey; c++)
memcpy(cache_dst[cache_idx] + c * sstride,
cache_ptr[cache_idx] + c * bs, sizex);
#endif
}
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
cache_ptr[cache_idx] = cache + cache_idx * bs * bs * 2;
dst_buffer =
CONVERT_TO_BYTEPTR(cache_ptr[cache_idx]) - ypos * bs - xpos;
} else {
cache_ptr[cache_idx] = cache + cache_idx * bs * bs;
dst_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
}
#else
cache_ptr[cache_idx] = cache + cache_idx * bs * bs;
dst_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
#endif
cache_dst[cache_idx] = src_buffer + ypos * sstride + xpos;
if (++cache_idx >= cache_blocks) cache_idx = 0;
// Apply the filter
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer),
CONVERT_TO_SHORTPTR(dst_buffer), sstride,
dstride, xpos, ypos, sizex, sizey, width,
height, strength);
} else {
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, width, height, strength);
}
#else
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, width, height, strength);
#endif
}
}
}
}
}
}
// Copy remaining blocks into the frame
for (cache_idx = 0; cache_idx < cache_blocks && cache_ptr[cache_idx];
cache_idx++) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
uint16_t *const d = CONVERT_TO_SHORTPTR(cache_dst[cache_idx]);
for (c = 0; c < bs; c++) {
*(uint64_t *)(d + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2);
if (bs == 8)
*(uint64_t *)(d + c * sstride + 4) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs * 2 + 8);
}
} else {
for (c = 0; c < bs; c++)
if (bs == 4)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
}
#else
for (c = 0; c < bs; c++)
if (bs == 4)
*(uint32_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint32_t *)(cache_ptr[cache_idx] + c * bs);
else
*(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
*(uint64_t *)(cache_ptr[cache_idx] + c * bs);
#endif
}
aom_free(cache);
aom_free(cache_ptr);
aom_free(cache_dst);
}