Loopfilter bitmask buildup
Change-Id: Icf7902e6f34380ea8f74662260c134e45e14f407
This commit is contained in:
parent
34f94985b7
commit
5acccbf9b9
368
loopfilter/alloccommon.c
Normal file
368
loopfilter/alloccommon.c
Normal file
@ -0,0 +1,368 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
|
||||
#include "av1/common/alloccommon.h"
|
||||
#include "av1/common/blockd.h"
|
||||
#include "av1/common/entropymode.h"
|
||||
#include "av1/common/entropymv.h"
|
||||
#include "av1/common/onyxc_int.h"
|
||||
|
||||
int av1_get_MBs(int width, int height) {
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(height, 3);
|
||||
const int mi_cols = aligned_width >> MI_SIZE_LOG2;
|
||||
const int mi_rows = aligned_height >> MI_SIZE_LOG2;
|
||||
|
||||
const int mb_cols = (mi_cols + 2) >> 2;
|
||||
const int mb_rows = (mi_rows + 2) >> 2;
|
||||
return mb_rows * mb_cols;
|
||||
}
|
||||
|
||||
void av1_set_mb_mi(AV1_COMMON *cm, int width, int height) {
|
||||
// Ensure that the decoded width and height are both multiples of
|
||||
// 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
|
||||
// subsampling is used).
|
||||
// This simplifies the implementation of various experiments,
|
||||
// eg. cdef, which operates on units of 8x8 luma pixels.
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(height, 3);
|
||||
|
||||
cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
|
||||
cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
|
||||
cm->mi_stride = calc_mi_size(cm->mi_cols);
|
||||
|
||||
cm->mb_cols = (cm->mi_cols + 2) >> 2;
|
||||
cm->mb_rows = (cm->mi_rows + 2) >> 2;
|
||||
cm->MBs = cm->mb_rows * cm->mb_cols;
|
||||
}
|
||||
|
||||
#if !CONFIG_SEGMENT_PRED_LAST
|
||||
static int alloc_seg_map(AV1_COMMON *cm, int rows, int cols) {
|
||||
int i;
|
||||
int seg_map_size = rows * cols;
|
||||
|
||||
for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
|
||||
cm->seg_map_array[i] = (uint8_t *)aom_calloc(seg_map_size, 1);
|
||||
if (cm->seg_map_array[i] == NULL) return 1;
|
||||
}
|
||||
cm->seg_map_alloc_size = seg_map_size;
|
||||
|
||||
// Init the index.
|
||||
cm->seg_map_idx = 0;
|
||||
cm->prev_seg_map_idx = 1;
|
||||
|
||||
cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
|
||||
if (!cm->frame_parallel_decode)
|
||||
cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_seg_map(AV1_COMMON *cm) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
|
||||
aom_free(cm->seg_map_array[i]);
|
||||
cm->seg_map_array[i] = NULL;
|
||||
}
|
||||
|
||||
cm->current_frame_seg_map = NULL;
|
||||
|
||||
if (!cm->frame_parallel_decode) {
|
||||
cm->last_frame_seg_map = NULL;
|
||||
}
|
||||
cm->seg_map_alloc_size = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void av1_free_ref_frame_buffers(BufferPool *pool) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FRAME_BUFFERS; ++i) {
|
||||
if (pool->frame_bufs[i].ref_count > 0 &&
|
||||
pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
|
||||
pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
|
||||
pool->frame_bufs[i].ref_count = 0;
|
||||
}
|
||||
aom_free(pool->frame_bufs[i].mvs);
|
||||
pool->frame_bufs[i].mvs = NULL;
|
||||
#if CONFIG_SEGMENT_PRED_LAST
|
||||
aom_free(pool->frame_bufs[i].seg_map);
|
||||
pool->frame_bufs[i].seg_map = NULL;
|
||||
#endif
|
||||
aom_free_frame_buffer(&pool->frame_bufs[i].buf);
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_LOOP_RESTORATION
|
||||
// Assumes cm->rst_info[p].restoration_unit_size is already initialized
|
||||
void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
|
||||
const int num_planes = av1_num_planes(cm);
|
||||
for (int p = 0; p < num_planes; ++p)
|
||||
av1_alloc_restoration_struct(cm, &cm->rst_info[p], p > 0);
|
||||
aom_free(cm->rst_tmpbuf);
|
||||
CHECK_MEM_ERROR(cm, cm->rst_tmpbuf,
|
||||
(int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
|
||||
|
||||
#if CONFIG_STRIPED_LOOP_RESTORATION
|
||||
// For striped loop restoration, we divide each row of tiles into "stripes",
|
||||
// of height 64 luma pixels but with an offset by RESTORATION_TILE_OFFSET
|
||||
// luma pixels to match the output from CDEF. We will need to store 2 *
|
||||
// RESTORATION_CTX_VERT lines of data for each stripe, and also need to be
|
||||
// able to quickly answer the question "Where is the <n>'th stripe for tile
|
||||
// row <m>?" To make that efficient, we generate the rst_last_stripe array.
|
||||
int num_stripes = 0;
|
||||
for (int i = 0; i < cm->tile_rows; ++i) {
|
||||
#if CONFIG_MAX_TILE
|
||||
TileInfo tile_info;
|
||||
av1_tile_set_row(&tile_info, cm, i);
|
||||
const int mi_h = tile_info.mi_row_end - tile_info.mi_row_start;
|
||||
#else
|
||||
const int mi_h = ((i + 1) < cm->tile_rows)
|
||||
? cm->tile_height
|
||||
: (cm->mi_rows - i * cm->tile_height);
|
||||
#endif
|
||||
const int ext_h = RESTORATION_TILE_OFFSET + (mi_h << MI_SIZE_LOG2);
|
||||
const int tile_stripes = (ext_h + 63) / 64;
|
||||
num_stripes += tile_stripes;
|
||||
cm->rst_end_stripe[i] = num_stripes;
|
||||
}
|
||||
|
||||
// Now we need to allocate enough space to store the line buffers for the
|
||||
// stripes
|
||||
#if CONFIG_HORZONLY_FRAME_SUPERRES
|
||||
const int frame_w = cm->superres_upscaled_width;
|
||||
#else
|
||||
const int frame_w = cm->width;
|
||||
#endif // CONFIG_HORZONLY_FRAME_SUPERRES
|
||||
const int use_highbd = cm->use_highbitdepth ? 1 : 0;
|
||||
|
||||
for (int p = 0; p < num_planes; ++p) {
|
||||
const int is_uv = p > 0;
|
||||
const int ss_x = is_uv && cm->subsampling_x;
|
||||
const int plane_w = ((frame_w + ss_x) >> ss_x) + 2 * RESTORATION_EXTRA_HORZ;
|
||||
const int stride = ALIGN_POWER_OF_TWO(plane_w, 5);
|
||||
const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT
|
||||
<< use_highbd;
|
||||
RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
|
||||
aom_free(boundaries->stripe_boundary_above);
|
||||
aom_free(boundaries->stripe_boundary_below);
|
||||
|
||||
CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_above,
|
||||
(uint8_t *)aom_memalign(32, buf_size));
|
||||
CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_below,
|
||||
(uint8_t *)aom_memalign(32, buf_size));
|
||||
|
||||
boundaries->stripe_boundary_stride = stride;
|
||||
}
|
||||
#endif // CONFIG_STRIPED_LOOP_RESTORATION
|
||||
}
|
||||
|
||||
void av1_free_restoration_buffers(AV1_COMMON *cm) {
|
||||
const int num_planes = av1_num_planes(cm);
|
||||
int p;
|
||||
for (p = 0; p < num_planes; ++p)
|
||||
av1_free_restoration_struct(&cm->rst_info[p]);
|
||||
aom_free(cm->rst_tmpbuf);
|
||||
cm->rst_tmpbuf = NULL;
|
||||
#if CONFIG_STRIPED_LOOP_RESTORATION
|
||||
for (p = 0; p < num_planes; ++p) {
|
||||
RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
|
||||
aom_free(boundaries->stripe_boundary_above);
|
||||
aom_free(boundaries->stripe_boundary_below);
|
||||
boundaries->stripe_boundary_above = NULL;
|
||||
boundaries->stripe_boundary_below = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif // CONFIG_LOOP_RESTORATION
|
||||
|
||||
void av1_free_context_buffers(AV1_COMMON *cm) {
|
||||
const int num_planes = av1_num_planes(cm);
|
||||
int i;
|
||||
cm->free_mi(cm);
|
||||
|
||||
aom_free(cm->boundary_info);
|
||||
cm->boundary_info_alloc_size = 0;
|
||||
cm->boundary_info = NULL;
|
||||
|
||||
#if !CONFIG_SEGMENT_PRED_LAST
|
||||
free_seg_map(cm);
|
||||
#endif
|
||||
for (i = 0; i < num_planes; i++) {
|
||||
aom_free(cm->above_context[i]);
|
||||
cm->above_context[i] = NULL;
|
||||
}
|
||||
aom_free(cm->above_seg_context);
|
||||
cm->above_seg_context = NULL;
|
||||
cm->above_context_alloc_cols = 0;
|
||||
aom_free(cm->above_txfm_context);
|
||||
cm->above_txfm_context = NULL;
|
||||
|
||||
for (i = 0; i < num_planes; ++i) {
|
||||
aom_free(cm->top_txfm_context[i]);
|
||||
cm->top_txfm_context[i] = NULL;
|
||||
}
|
||||
|
||||
aom_free(cm->lf.lfm);
|
||||
cm->lf.lfm = NULL;
|
||||
cm->lf.lfm_num = 0;
|
||||
cm->lf.lfm_stride = 0;
|
||||
cm->lf.curr_frame_offset = 0;
|
||||
|
||||
aom_free(cm->lf.neighbor);
|
||||
cm->lf.neighbor = NULL;
|
||||
cm->lf.neighbor_width = 0;
|
||||
cm->lf.neighbor_height = 0;
|
||||
}
|
||||
|
||||
static int alloc_loop_filter(AV1_COMMON *cm) {
|
||||
aom_free(cm->lf.lfm);
|
||||
// Each lfm holds bit masks for all the 4x4 blocks in a max
|
||||
// 64x64 (128x128 for ext_partitions) region. The stride
|
||||
// and rows are rounded up / truncated to a multiple of 16
|
||||
// (32 for ext_partition).
|
||||
cm->lf.lfm_stride = (cm->mi_cols + (MAX_MIB_SIZE - 1)) >> MAX_MIB_SIZE_LOG2;
|
||||
cm->lf.lfm_num = ((cm->mi_rows + (MAX_MIB_SIZE - 1)) >> MAX_MIB_SIZE_LOG2) *
|
||||
cm->lf.lfm_stride;
|
||||
cm->lf.curr_frame_offset = 0xbeef;
|
||||
cm->lf.lfm = (LpfMask *)aom_calloc(cm->lf.lfm_num, sizeof(*cm->lf.lfm));
|
||||
if (!cm->lf.lfm) return 1;
|
||||
|
||||
// Neighbor information
|
||||
aom_free(cm->lf.neighbor);
|
||||
cm->lf.neighbor_width = cm->mi_cols + (MAX_MIB_SIZE - 1);
|
||||
cm->lf.neighbor_height = cm->mi_rows + (MAX_MIB_SIZE - 1);
|
||||
// Total 6 neighbor info, each has width and height info, respectively.
|
||||
// ------------------------------------------------------------
|
||||
// top zone left zone
|
||||
// neighbor_width neighbor_height
|
||||
// Y tx_size |--------------|---------------|
|
||||
// UV tx_size |--------------|---------------|
|
||||
// Y level |--------------|---------------|
|
||||
// U level |--------------|---------------|
|
||||
// V level |--------------|---------------|
|
||||
// skip |--------------|---------------|
|
||||
// ------------------------------------------------------------
|
||||
cm->lf.neighbor = (uint8_t *)aom_calloc(
|
||||
6 * (cm->lf.neighbor_width + cm->lf.neighbor_height), sizeof(uint8_t));
|
||||
if (!cm->lf.neighbor) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
|
||||
const int num_planes = av1_num_planes(cm);
|
||||
int new_mi_size;
|
||||
|
||||
av1_set_mb_mi(cm, width, height);
|
||||
new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
|
||||
if (cm->mi_alloc_size < new_mi_size) {
|
||||
cm->free_mi(cm);
|
||||
if (cm->alloc_mi(cm, new_mi_size)) goto fail;
|
||||
}
|
||||
|
||||
const int new_boundary_info_alloc_size = cm->mi_rows * cm->mi_stride;
|
||||
if (cm->boundary_info_alloc_size < new_boundary_info_alloc_size) {
|
||||
aom_free(cm->boundary_info);
|
||||
cm->boundary_info = (BOUNDARY_TYPE *)aom_calloc(
|
||||
new_boundary_info_alloc_size, sizeof(BOUNDARY_TYPE));
|
||||
cm->boundary_info_alloc_size = 0;
|
||||
if (!cm->boundary_info) goto fail;
|
||||
cm->boundary_info_alloc_size = new_boundary_info_alloc_size;
|
||||
}
|
||||
|
||||
#if !CONFIG_SEGMENT_PRED_LAST
|
||||
if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) {
|
||||
// Create the segmentation map structure and set to 0.
|
||||
free_seg_map(cm);
|
||||
if (alloc_seg_map(cm, cm->mi_rows, cm->mi_cols)) goto fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cm->above_context_alloc_cols < cm->mi_cols) {
|
||||
// TODO(geza.lore): These are bigger than they need to be.
|
||||
// cm->tile_width would be enough but it complicates indexing a
|
||||
// little elsewhere.
|
||||
const int aligned_mi_cols =
|
||||
ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_planes; i++) {
|
||||
aom_free(cm->above_context[i]);
|
||||
cm->above_context[i] = (ENTROPY_CONTEXT *)aom_calloc(
|
||||
aligned_mi_cols << (MI_SIZE_LOG2 - tx_size_wide_log2[0]),
|
||||
sizeof(*cm->above_context[0]));
|
||||
if (!cm->above_context[i]) goto fail;
|
||||
}
|
||||
|
||||
aom_free(cm->above_seg_context);
|
||||
cm->above_seg_context = (PARTITION_CONTEXT *)aom_calloc(
|
||||
aligned_mi_cols, sizeof(*cm->above_seg_context));
|
||||
if (!cm->above_seg_context) goto fail;
|
||||
|
||||
aom_free(cm->above_txfm_context);
|
||||
cm->above_txfm_context = (TXFM_CONTEXT *)aom_calloc(
|
||||
aligned_mi_cols << TX_UNIT_WIDE_LOG2, sizeof(*cm->above_txfm_context));
|
||||
if (!cm->above_txfm_context) goto fail;
|
||||
|
||||
for (i = 0; i < num_planes; ++i) {
|
||||
aom_free(cm->top_txfm_context[i]);
|
||||
cm->top_txfm_context[i] =
|
||||
(TXFM_CONTEXT *)aom_calloc(aligned_mi_cols << TX_UNIT_WIDE_LOG2,
|
||||
sizeof(*cm->top_txfm_context[0]));
|
||||
if (!cm->top_txfm_context[i]) goto fail;
|
||||
}
|
||||
|
||||
cm->above_context_alloc_cols = aligned_mi_cols;
|
||||
}
|
||||
|
||||
if (alloc_loop_filter(cm)) goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
// clear the mi_* values to force a realloc on resync
|
||||
av1_set_mb_mi(cm, 0, 0);
|
||||
av1_free_context_buffers(cm);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void av1_remove_common(AV1_COMMON *cm) {
|
||||
av1_free_context_buffers(cm);
|
||||
|
||||
aom_free(cm->fc);
|
||||
cm->fc = NULL;
|
||||
aom_free(cm->frame_contexts);
|
||||
cm->frame_contexts = NULL;
|
||||
}
|
||||
|
||||
void av1_init_context_buffers(AV1_COMMON *cm) {
|
||||
cm->setup_mi(cm);
|
||||
#if !CONFIG_SEGMENT_PRED_LAST
|
||||
if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
|
||||
memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
|
||||
#endif
|
||||
}
|
||||
#if !CONFIG_SEGMENT_PRED_LAST
|
||||
void av1_swap_current_and_last_seg_map(AV1_COMMON *cm) {
|
||||
// Swap indices.
|
||||
const int tmp = cm->seg_map_idx;
|
||||
cm->seg_map_idx = cm->prev_seg_map_idx;
|
||||
cm->prev_seg_map_idx = tmp;
|
||||
|
||||
cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
|
||||
cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
|
||||
}
|
||||
#endif
|
4244
loopfilter/av1_loopfilter.c
Normal file
4244
loopfilter/av1_loopfilter.c
Normal file
File diff suppressed because it is too large
Load Diff
213
loopfilter/av1_loopfilter.h
Normal file
213
loopfilter/av1_loopfilter.h
Normal file
@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AV1_COMMON_LOOPFILTER_H_
|
||||
#define AV1_COMMON_LOOPFILTER_H_
|
||||
|
||||
#include "aom_ports/mem.h"
|
||||
#include "./aom_config.h"
|
||||
|
||||
#include "av1/common/blockd.h"
|
||||
#include "av1/common/seg_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
#define MAX_SHARPNESS 7
|
||||
|
||||
#define SIMD_WIDTH 16
|
||||
|
||||
#define MAX_MODE_LF_DELTAS 2
|
||||
|
||||
enum lf_path {
|
||||
LF_PATH_420,
|
||||
LF_PATH_444,
|
||||
LF_PATH_SLOW,
|
||||
};
|
||||
|
||||
typedef struct { uint64_t bits[4]; } FilterMaskY;
|
||||
|
||||
typedef uint64_t FilterMaskUV;
|
||||
|
||||
// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
|
||||
// Each 1 bit represents a position in which we want to apply the loop filter.
|
||||
// Left_ entries refer to whether we apply a filter on the border to the
|
||||
// left of the block. Above_ entries refer to whether or not to apply a
|
||||
// filter on the above border. Int_ entries refer to whether or not to
|
||||
// apply borders on the 4x4 edges within the 8x8 block that each bit
|
||||
// represents.
|
||||
// Since each transform is accompanied by a potentially different type of
|
||||
// loop filter there is a different entry in the array for each transform size.
|
||||
typedef struct {
|
||||
FilterMaskY left_y[TX_SIZES];
|
||||
FilterMaskY above_y[TX_SIZES];
|
||||
FilterMaskUV left_u[TX_SIZES];
|
||||
FilterMaskUV above_u[TX_SIZES];
|
||||
FilterMaskUV left_v[TX_SIZES];
|
||||
FilterMaskUV above_v[TX_SIZES];
|
||||
|
||||
// Y plane vertical edge and horizontal edge filter level
|
||||
uint8_t lfl_y_hor[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2];
|
||||
uint8_t lfl_y_ver[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2];
|
||||
|
||||
// UV plane vertical edge and horizontal edge shares the same level
|
||||
uint8_t lfl_u[MAX_MIB_SIZE / 4][MAX_MIB_SIZE / 4];
|
||||
uint8_t lfl_v[MAX_MIB_SIZE / 4][MAX_MIB_SIZE / 4];
|
||||
} LoopFilterMask;
|
||||
|
||||
// Loopfilter bit mask per super block
|
||||
#define LOOP_FILTER_MASK_NUM 4
|
||||
typedef struct {
|
||||
LoopFilterMask lfm[LOOP_FILTER_MASK_NUM];
|
||||
int is_setup;
|
||||
} LpfMask;
|
||||
|
||||
struct loopfilter {
|
||||
LpfMask *lfm;
|
||||
// Neighbor block information for loopfilter bit mask setup
|
||||
uint8_t *neighbor;
|
||||
size_t lfm_num;
|
||||
int lfm_stride;
|
||||
unsigned int curr_frame_offset;
|
||||
unsigned int neighbor_width;
|
||||
unsigned int neighbor_height;
|
||||
#if CONFIG_LOOPFILTER_LEVEL
|
||||
int filter_level[2];
|
||||
int filter_level_u;
|
||||
int filter_level_v;
|
||||
#else
|
||||
int filter_level;
|
||||
#endif
|
||||
|
||||
int sharpness_level;
|
||||
int last_sharpness_level;
|
||||
|
||||
uint8_t mode_ref_delta_enabled;
|
||||
uint8_t mode_ref_delta_update;
|
||||
|
||||
// 0 = Intra, Last, Last2+Last3,
|
||||
// GF, BRF, ARF2, ARF
|
||||
int8_t ref_deltas[TOTAL_REFS_PER_FRAME];
|
||||
int8_t last_ref_deltas[TOTAL_REFS_PER_FRAME];
|
||||
|
||||
// 0 = ZERO_MV, MV
|
||||
int8_t mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
int8_t last_mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
};
|
||||
|
||||
// Need to align this structure so when it is declared and
|
||||
// passed it can be loaded into vector registers.
|
||||
typedef struct {
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
|
||||
} loop_filter_thresh;
|
||||
|
||||
typedef struct {
|
||||
loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
|
||||
#if CONFIG_LOOPFILTER_LEVEL
|
||||
uint8_t lvl[MAX_SEGMENTS][2][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
|
||||
#else
|
||||
uint8_t lvl[MAX_SEGMENTS][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
|
||||
#endif
|
||||
} loop_filter_info_n;
|
||||
|
||||
/* assorted loopfilter functions which get used elsewhere */
|
||||
struct AV1Common;
|
||||
struct macroblockd;
|
||||
struct AV1LfSyncData;
|
||||
|
||||
// This function sets up the bit masks for the entire 64x64 region represented
|
||||
// by mi_row, mi_col.
|
||||
void av1_setup_mask(struct AV1Common *const cm, const int mi_row,
|
||||
const int mi_col, MODE_INFO **mi_4x4,
|
||||
const int mode_info_stride, LpfMask *lfm);
|
||||
|
||||
void av1_filter_block_plane_ss00_ver(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LoopFilterMask *lfm);
|
||||
void av1_filter_block_plane_ss00_hor(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LoopFilterMask *lfm);
|
||||
void av1_filter_block_plane_ss11_u_ver(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LoopFilterMask *lfm);
|
||||
void av1_filter_block_plane_ss11_u_hor(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LoopFilterMask *lfm);
|
||||
|
||||
void av1_filter_block_plane_ss11_v_ver(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LoopFilterMask *lfm);
|
||||
void av1_filter_block_plane_ss11_v_hor(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LoopFilterMask *lfm);
|
||||
|
||||
void av1_filter_block_plane_non420_ver(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *plane,
|
||||
MODE_INFO **mi_8x8, int mi_row,
|
||||
int mi_col, int pl);
|
||||
void av1_filter_block_plane_non420_hor(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *plane,
|
||||
MODE_INFO **mi_8x8, int mi_row,
|
||||
int mi_col, int pl);
|
||||
|
||||
void av1_loop_filter_init(struct AV1Common *cm);
|
||||
|
||||
// Update the loop filter for the current frame.
|
||||
// This should be called before av1_loop_filter_rows(),
|
||||
// av1_loop_filter_frame()
|
||||
// calls this function directly.
|
||||
void av1_loop_filter_frame_init(struct AV1Common *cm, int default_filt_lvl,
|
||||
int default_filt_lvl_r
|
||||
#if CONFIG_LOOPFILTER_LEVEL
|
||||
,
|
||||
int plane
|
||||
#endif
|
||||
);
|
||||
|
||||
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
|
||||
struct macroblockd *mbd, int filter_level,
|
||||
#if CONFIG_LOOPFILTER_LEVEL
|
||||
int filter_level_r,
|
||||
#endif
|
||||
int y_only, int partial_frame);
|
||||
|
||||
// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
|
||||
void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
|
||||
struct AV1Common *cm,
|
||||
struct macroblockd_plane *planes, int start, int stop,
|
||||
int y_only);
|
||||
|
||||
typedef struct LoopFilterWorkerData {
|
||||
YV12_BUFFER_CONFIG *frame_buffer;
|
||||
struct AV1Common *cm;
|
||||
struct macroblockd_plane planes[MAX_MB_PLANE];
|
||||
|
||||
int start;
|
||||
int stop;
|
||||
int y_only;
|
||||
} LFWorkerData;
|
||||
|
||||
void av1_loop_filter_data_reset(LFWorkerData *lf_data,
|
||||
YV12_BUFFER_CONFIG *frame_buffer,
|
||||
struct AV1Common *cm,
|
||||
const struct macroblockd_plane *planes);
|
||||
|
||||
// Operates on the rows described by 'lf_data'.
|
||||
int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // AV1_COMMON_LOOPFILTER_H_
|
150
loopfilter/shift.c
Normal file
150
loopfilter/shift.c
Normal file
@ -0,0 +1,150 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Note:
|
||||
// Establish 64x64 block, contructed by 256 (16x16) 4x4 sub-block.
|
||||
// Every 4 rows would be represented by one uint64_t mask. Hence,
|
||||
// there are 4 uint64_t bitmask[4] to represent the whole 64x64.
|
||||
//
|
||||
// Given a location by (idx, idy), This function returns the index
|
||||
// 0, 1, 2, 3 to select which bitmask[] to use.
|
||||
// Then the pointer y_shift contains the shift value in the bit mask.
|
||||
// Function returns y_shift; y_index contains the index.
|
||||
//
|
||||
int get_y_index_shift(int idx, int idy, int *y_index) {
|
||||
*y_index = idy >> 4;
|
||||
const int y_idy = (idy >> 2) % 4;
|
||||
return (y_idy << 4) + (idx >> 2);
|
||||
}
|
||||
|
||||
// Note:
|
||||
// For 4:2:0 format sampling, establish 32x32 block, constructed by
|
||||
// 64 (8x8), 4x4 sub-block. We need one uint64_t bitmask to present
|
||||
// all edge information
|
||||
// Function returns uv_shift.
|
||||
//
|
||||
int get_uv_index_shift(int idx, int idy) {
|
||||
return ((idy >> 3) << 3) + (idx >> 3);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
// AV1 has 4x4 coding block
|
||||
// I use 4 uint64_t integer to describe block edge information by a bit mask
|
||||
//
|
||||
void get_y_shift_value(int size) {
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < size; y += 4) {
|
||||
for (x = 0; x < size; x += 4) {
|
||||
printf("[%02d,%02d] ", x, y);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
int v_index;
|
||||
int y_shift;
|
||||
for (y = 0; y < size; y += 4) {
|
||||
for (x = 0; x < size; x += 4) {
|
||||
// cb8x8
|
||||
//int shift = ((y >> 3) << 3) + (x >> 3);
|
||||
//printf("%02d ", shift);
|
||||
|
||||
// cb4x4
|
||||
y_shift = get_y_index_shift(x, y, &v_index);
|
||||
printf("%02d ", y_shift);
|
||||
}
|
||||
printf("Index %d\n", v_index);
|
||||
}
|
||||
}
|
||||
|
||||
void get_uv_shift_value(int size) {
|
||||
int x, y;
|
||||
|
||||
int uv_shift = 0;
|
||||
const int step = 4;
|
||||
for (y = 0; y < size; y += step) {
|
||||
for (x = 0; x < size; x += step) {
|
||||
// cb8x8
|
||||
// int uv_shift = ((y >> 3) << 2) + (x >> 3);
|
||||
|
||||
// cb4x4
|
||||
uv_shift = get_uv_index_shift(x, y);
|
||||
printf("%02d ", uv_shift);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
int get_uv_shift(int idx, int idy) {
|
||||
return (((idy - 2) >> 2) << 3) + (idx >> 2);
|
||||
}
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
// AV1: AV1=1
|
||||
// VP9: AV1=0
|
||||
#define AV1 1
|
||||
|
||||
#if AV1
|
||||
#define MAX_MIB_SIZE_LOG2 (4)
|
||||
const int num = 16;
|
||||
typedef struct {
|
||||
uint64_t bits[4];
|
||||
} FilterMaskY;
|
||||
#else
|
||||
#define MAX_MIB_SIZE_LOG2 (3)
|
||||
const int num = 8;
|
||||
#endif
|
||||
|
||||
|
||||
int main() {
|
||||
get_y_shift_value(64);
|
||||
printf("\n");
|
||||
get_uv_shift_value(64);
|
||||
printf("\n");
|
||||
|
||||
int y_index = 0;
|
||||
const int x = 0;
|
||||
int y;
|
||||
int i;
|
||||
|
||||
// Remaining rows are 1, 2, ..., num - 1
|
||||
// VP9 : 1-7
|
||||
// AV1 : 1-15
|
||||
for (i = 1; i < num; ++i) {
|
||||
#if AV1
|
||||
y = i << 2;
|
||||
int y_shift = get_y_index_shift(x, y, &y_index);
|
||||
int uv_shift = get_uv_shift(x >> 1, y >> 1);
|
||||
|
||||
printf("[%02d,%02d] index=%d y_shift=%02d uv_shift=%02d mask_y ",
|
||||
x, y, y_index, y_shift, uv_shift);
|
||||
|
||||
FilterMaskY mask = {0, 0, 0, 0};
|
||||
int j;
|
||||
for (j = 0; j < y_index; ++j) {
|
||||
mask.bits[j] = 0xffffffffffffffffULL;
|
||||
}
|
||||
mask.bits[y_index] = ((uint64_t)1 << y_shift) - 1;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
printf("0x%016llx ", (unsigned long long int)mask.bits[j]);
|
||||
}
|
||||
|
||||
uint64_t mask_uv = (((uint64_t)1 << (uv_shift + 8)) - 1);
|
||||
if (uv_shift + 8 == 64) mask_uv = 0xffffffffffffffffULL;
|
||||
|
||||
printf("mask_uv 0x%016llx", (unsigned long long int)mask_uv);
|
||||
printf("\n");
|
||||
#else
|
||||
const uint64_t mask_y = (((uint64_t)1 << (i << MAX_MIB_SIZE_LOG2)) - 1);
|
||||
const uint16_t mask_uv =
|
||||
(((uint16_t)1 << (((i + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1);
|
||||
printf("mask_y=%016llx, mask_uv=%04x\n", (long long unsigned int)mask_y, mask_uv);
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
538
loopfilter/thread_common.c
Normal file
538
loopfilter/thread_common.c
Normal file
@ -0,0 +1,538 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "./aom_config.h"
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
#include "av1/common/entropymode.h"
|
||||
#include "av1/common/thread_common.h"
|
||||
#include "av1/common/reconinter.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
|
||||
const int kMaxTryLocks = 4000;
|
||||
int locked = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < kMaxTryLocks; ++i) {
|
||||
if (!pthread_mutex_trylock(mutex)) {
|
||||
locked = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!locked) pthread_mutex_lock(mutex);
|
||||
}
|
||||
#endif // CONFIG_MULTITHREAD
|
||||
|
||||
static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c) {
|
||||
#if CONFIG_MULTITHREAD
|
||||
const int nsync = lf_sync->sync_range;
|
||||
|
||||
if (r && !(c & (nsync - 1))) {
|
||||
pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
|
||||
mutex_lock(mutex);
|
||||
|
||||
while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
|
||||
pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
|
||||
}
|
||||
pthread_mutex_unlock(mutex);
|
||||
}
|
||||
#else
|
||||
(void)lf_sync;
|
||||
(void)r;
|
||||
(void)c;
|
||||
#endif // CONFIG_MULTITHREAD
|
||||
}
|
||||
|
||||
static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
|
||||
const int sb_cols) {
|
||||
#if CONFIG_MULTITHREAD
|
||||
const int nsync = lf_sync->sync_range;
|
||||
int cur;
|
||||
// Only signal when there are enough filtered SB for next row to run.
|
||||
int sig = 1;
|
||||
|
||||
if (c < sb_cols - 1) {
|
||||
cur = c;
|
||||
if (c % nsync) sig = 0;
|
||||
} else {
|
||||
cur = sb_cols + nsync;
|
||||
}
|
||||
|
||||
if (sig) {
|
||||
mutex_lock(&lf_sync->mutex_[r]);
|
||||
|
||||
lf_sync->cur_sb_col[r] = cur;
|
||||
|
||||
pthread_cond_signal(&lf_sync->cond_[r]);
|
||||
pthread_mutex_unlock(&lf_sync->mutex_[r]);
|
||||
}
|
||||
#else
|
||||
(void)lf_sync;
|
||||
(void)r;
|
||||
(void)c;
|
||||
(void)sb_cols;
|
||||
#endif // CONFIG_MULTITHREAD
|
||||
}
|
||||
|
||||
#if !CONFIG_EXT_PARTITION_TYPES
|
||||
static INLINE enum lf_path get_loop_filter_path(
|
||||
int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
|
||||
if (y_only)
|
||||
return LF_PATH_444;
|
||||
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
|
||||
return LF_PATH_420;
|
||||
else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
|
||||
return LF_PATH_444;
|
||||
else
|
||||
return LF_PATH_SLOW;
|
||||
}
|
||||
|
||||
static INLINE void loop_filter_block_plane_ver(
|
||||
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
|
||||
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
|
||||
LoopFilterMask *lfm) {
|
||||
if (plane == 0) {
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, lfm);
|
||||
} else {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi, mi_row,
|
||||
mi_col, plane);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void loop_filter_block_plane_hor(
|
||||
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
|
||||
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
|
||||
LoopFilterMask *lfm) {
|
||||
if (plane == 0) {
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, lfm);
|
||||
} else {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi, mi_row,
|
||||
mi_col, plane);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Row-based multi-threaded loopfilter hook
|
||||
#if CONFIG_PARALLEL_DEBLOCKING
|
||||
static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
|
||||
LFWorkerData *const lf_data) {
|
||||
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
|
||||
int mi_row, mi_col;
|
||||
#if !CONFIG_EXT_PARTITION_TYPES
|
||||
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
|
||||
#endif
|
||||
for (mi_row = lf_data->start; mi_row < lf_data->stop;
|
||||
mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
|
||||
MODE_INFO **const mi =
|
||||
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
|
||||
mi_col += lf_data->cm->mib_size) {
|
||||
LpfMask lfm;
|
||||
int plane;
|
||||
|
||||
av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
|
||||
lf_data->frame_buffer, mi_row, mi_col,
|
||||
av1_num_planes(lf_data->cm));
|
||||
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
|
||||
lf_data->cm->mi_stride, &lfm);
|
||||
|
||||
#if CONFIG_EXT_PARTITION_TYPES
|
||||
for (plane = 0; plane < num_planes; ++plane)
|
||||
av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
|
||||
mi + mi_col, mi_row, mi_col, plane);
|
||||
#else
|
||||
|
||||
for (plane = 0; plane < num_planes; ++plane)
|
||||
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
|
||||
mi + mi_col, mi_row, mi_col, path, &lfm);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
|
||||
LFWorkerData *const lf_data) {
|
||||
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
|
||||
const int sb_cols =
|
||||
mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
|
||||
int mi_row, mi_col;
|
||||
#if !CONFIG_EXT_PARTITION_TYPES
|
||||
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
|
||||
#endif
|
||||
|
||||
for (mi_row = lf_data->start; mi_row < lf_data->stop;
|
||||
mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
|
||||
MODE_INFO **const mi =
|
||||
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
|
||||
mi_col += lf_data->cm->mib_size) {
|
||||
const int r = mi_row >> lf_data->cm->mib_size_log2;
|
||||
const int c = mi_col >> lf_data->cm->mib_size_log2;
|
||||
LpfMask lfm;
|
||||
int plane;
|
||||
|
||||
// TODO(wenhao.zhang@intel.com): For better parallelization, reorder
|
||||
// the outer loop to column-based and remove the synchronizations here.
|
||||
sync_read(lf_sync, r, c);
|
||||
|
||||
av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
|
||||
lf_data->frame_buffer, mi_row, mi_col,
|
||||
av1_num_planes(lf_data->cm));
|
||||
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
|
||||
lf_data->cm->mi_stride, &lfm);
|
||||
#if CONFIG_EXT_PARTITION_TYPES
|
||||
for (plane = 0; plane < num_planes; ++plane)
|
||||
av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
|
||||
mi + mi_col, mi_row, mi_col, plane);
|
||||
#else
|
||||
for (plane = 0; plane < num_planes; ++plane)
|
||||
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
|
||||
mi + mi_col, mi_row, mi_col, path, &lfm);
|
||||
#endif
|
||||
sync_write(lf_sync, r, c, sb_cols);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#else // CONFIG_PARALLEL_DEBLOCKING
|
||||
static int loop_filter_row_worker(AV1LfSync *const lf_sync,
|
||||
LFWorkerData *const lf_data) {
|
||||
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
|
||||
const int sb_cols =
|
||||
mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
|
||||
int mi_row, mi_col;
|
||||
#if !CONFIG_EXT_PARTITION_TYPES
|
||||
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
|
||||
#endif // !CONFIG_EXT_PARTITION_TYPES
|
||||
|
||||
#if CONFIG_EXT_PARTITION
|
||||
printf(
|
||||
"STOPPING: This code has not been modified to work with the "
|
||||
"extended coding unit size experiment");
|
||||
exit(EXIT_FAILURE);
|
||||
#endif // CONFIG_EXT_PARTITION
|
||||
|
||||
for (mi_row = lf_data->start; mi_row < lf_data->stop;
|
||||
mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
|
||||
MODE_INFO **const mi =
|
||||
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
|
||||
mi_col += lf_data->cm->mib_size) {
|
||||
const int r = mi_row >> lf_data->cm->mib_size_log2;
|
||||
const int c = mi_col >> lf_data->cm->mib_size_log2;
|
||||
#if !CONFIG_EXT_PARTITION_TYPES
|
||||
LpfMask lfm;
|
||||
#endif
|
||||
int plane;
|
||||
|
||||
sync_read(lf_sync, r, c);
|
||||
|
||||
av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
|
||||
lf_data->frame_buffer, mi_row, mi_col);
|
||||
#if CONFIG_EXT_PARTITION_TYPES
|
||||
for (plane = 0; plane < num_planes; ++plane) {
|
||||
av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
|
||||
mi + mi_col, mi_row, mi_col, plane);
|
||||
av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
|
||||
mi + mi_col, mi_row, mi_col, plane);
|
||||
}
|
||||
#else
|
||||
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
|
||||
lf_data->cm->mi_stride, &lfm);
|
||||
|
||||
for (plane = 0; plane < num_planes; ++plane) {
|
||||
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
|
||||
mi + mi_col, mi_row, mi_col, path, &lfm);
|
||||
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
|
||||
mi + mi_col, mi_row, mi_col, path, &lfm);
|
||||
}
|
||||
#endif // CONFIG_EXT_PARTITION_TYPES
|
||||
sync_write(lf_sync, r, c, sb_cols);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif // CONFIG_PARALLEL_DEBLOCKING
|
||||
|
||||
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
||||
struct macroblockd_plane *planes, int start,
|
||||
int stop, int y_only, AVxWorker *workers,
|
||||
int nworkers, AV1LfSync *lf_sync) {
|
||||
#if CONFIG_EXT_PARTITION
|
||||
printf(
|
||||
"STOPPING: This code has not been modified to work with the "
|
||||
"extended coding unit size experiment");
|
||||
exit(EXIT_FAILURE);
|
||||
#endif // CONFIG_EXT_PARTITION
|
||||
|
||||
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
|
||||
// Number of superblock rows and cols
|
||||
const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
|
||||
// Decoder may allocate more threads than number of tiles based on user's
|
||||
// input.
|
||||
const int tile_cols = cm->tile_cols;
|
||||
const int num_workers = AOMMIN(nworkers, tile_cols);
|
||||
int i;
|
||||
|
||||
if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
|
||||
num_workers > lf_sync->num_workers) {
|
||||
av1_loop_filter_dealloc(lf_sync);
|
||||
av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
|
||||
}
|
||||
|
||||
// Set up loopfilter thread data.
|
||||
// The decoder is capping num_workers because it has been observed that
|
||||
// using more threads on the loopfilter than there are cores will hurt
|
||||
// performance on Android. This is because the system will only schedule the
|
||||
// tile decode workers on cores equal to the number of tile columns. Then if
|
||||
// the decoder tries to use more threads for the loopfilter, it will hurt
|
||||
// performance because of contention. If the multithreading code changes in
|
||||
// the future then the number of workers used by the loopfilter should be
|
||||
// revisited.
|
||||
|
||||
#if CONFIG_PARALLEL_DEBLOCKING
|
||||
// Initialize cur_sb_col to -1 for all SB rows.
|
||||
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
|
||||
|
||||
// Filter all the vertical edges in the whole frame
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
AVxWorker *const worker = &workers[i];
|
||||
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
|
||||
|
||||
worker->hook = (AVxWorkerHook)loop_filter_ver_row_worker;
|
||||
worker->data1 = lf_sync;
|
||||
worker->data2 = lf_data;
|
||||
|
||||
// Loopfilter data
|
||||
av1_loop_filter_data_reset(lf_data, frame, cm, planes);
|
||||
lf_data->start = start + i * cm->mib_size;
|
||||
lf_data->stop = stop;
|
||||
lf_data->y_only = y_only;
|
||||
|
||||
// Start loopfiltering
|
||||
if (i == num_workers - 1) {
|
||||
winterface->execute(worker);
|
||||
} else {
|
||||
winterface->launch(worker);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait till all rows are finished
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
winterface->sync(&workers[i]);
|
||||
}
|
||||
|
||||
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
|
||||
// Filter all the horizontal edges in the whole frame
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
AVxWorker *const worker = &workers[i];
|
||||
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
|
||||
|
||||
worker->hook = (AVxWorkerHook)loop_filter_hor_row_worker;
|
||||
worker->data1 = lf_sync;
|
||||
worker->data2 = lf_data;
|
||||
|
||||
// Loopfilter data
|
||||
av1_loop_filter_data_reset(lf_data, frame, cm, planes);
|
||||
lf_data->start = start + i * cm->mib_size;
|
||||
lf_data->stop = stop;
|
||||
lf_data->y_only = y_only;
|
||||
|
||||
// Start loopfiltering
|
||||
if (i == num_workers - 1) {
|
||||
winterface->execute(worker);
|
||||
} else {
|
||||
winterface->launch(worker);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait till all rows are finished
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
winterface->sync(&workers[i]);
|
||||
}
|
||||
#else // CONFIG_PARALLEL_DEBLOCKING
|
||||
// Initialize cur_sb_col to -1 for all SB rows.
|
||||
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
|
||||
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
AVxWorker *const worker = &workers[i];
|
||||
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
|
||||
|
||||
worker->hook = (AVxWorkerHook)loop_filter_row_worker;
|
||||
worker->data1 = lf_sync;
|
||||
worker->data2 = lf_data;
|
||||
|
||||
// Loopfilter data
|
||||
av1_loop_filter_data_reset(lf_data, frame, cm, planes);
|
||||
lf_data->start = start + i * cm->mib_size;
|
||||
lf_data->stop = stop;
|
||||
lf_data->y_only = y_only;
|
||||
|
||||
// Start loopfiltering
|
||||
if (i == num_workers - 1) {
|
||||
winterface->execute(worker);
|
||||
} else {
|
||||
winterface->launch(worker);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait till all rows are finished
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
winterface->sync(&workers[i]);
|
||||
}
|
||||
#endif // CONFIG_PARALLEL_DEBLOCKING
|
||||
}
|
||||
|
||||
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
||||
struct macroblockd_plane *planes,
|
||||
int frame_filter_level,
|
||||
#if CONFIG_LOOPFILTER_LEVEL
|
||||
int frame_filter_level_r,
|
||||
#endif
|
||||
int y_only, int partial_frame, AVxWorker *workers,
|
||||
int num_workers, AV1LfSync *lf_sync) {
|
||||
int start_mi_row, end_mi_row, mi_rows_to_filter;
|
||||
|
||||
if (!frame_filter_level) return;
|
||||
|
||||
start_mi_row = 0;
|
||||
mi_rows_to_filter = cm->mi_rows;
|
||||
if (partial_frame && cm->mi_rows > 8) {
|
||||
start_mi_row = cm->mi_rows >> 1;
|
||||
start_mi_row &= 0xfffffff8;
|
||||
mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
|
||||
}
|
||||
end_mi_row = start_mi_row + mi_rows_to_filter;
|
||||
#if CONFIG_LOOPFILTER_LEVEL
|
||||
av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
|
||||
y_only);
|
||||
#else
|
||||
av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
|
||||
#endif // CONFIG_LOOPFILTER_LEVEL
|
||||
loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
|
||||
workers, num_workers, lf_sync);
|
||||
}
|
||||
|
||||
// Set up nsync by width.
|
||||
static INLINE int get_sync_range(int width) {
|
||||
// nsync numbers are picked by testing. For example, for 4k
|
||||
// video, using 4 gives best performance.
|
||||
if (width < 640)
|
||||
return 1;
|
||||
else if (width <= 1280)
|
||||
return 2;
|
||||
else if (width <= 4096)
|
||||
return 4;
|
||||
else
|
||||
return 8;
|
||||
}
|
||||
|
||||
// Allocate memory for lf row synchronization
|
||||
void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
|
||||
int width, int num_workers) {
|
||||
lf_sync->rows = rows;
|
||||
#if CONFIG_MULTITHREAD
|
||||
{
|
||||
int i;
|
||||
|
||||
CHECK_MEM_ERROR(cm, lf_sync->mutex_,
|
||||
aom_malloc(sizeof(*lf_sync->mutex_) * rows));
|
||||
if (lf_sync->mutex_) {
|
||||
for (i = 0; i < rows; ++i) {
|
||||
pthread_mutex_init(&lf_sync->mutex_[i], NULL);
|
||||
}
|
||||
}
|
||||
|
||||
CHECK_MEM_ERROR(cm, lf_sync->cond_,
|
||||
aom_malloc(sizeof(*lf_sync->cond_) * rows));
|
||||
if (lf_sync->cond_) {
|
||||
for (i = 0; i < rows; ++i) {
|
||||
pthread_cond_init(&lf_sync->cond_[i], NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_MULTITHREAD
|
||||
|
||||
CHECK_MEM_ERROR(cm, lf_sync->lfdata,
|
||||
aom_malloc(num_workers * sizeof(*lf_sync->lfdata)));
|
||||
lf_sync->num_workers = num_workers;
|
||||
|
||||
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
|
||||
aom_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
|
||||
|
||||
// Set up nsync.
|
||||
lf_sync->sync_range = get_sync_range(width);
|
||||
}
|
||||
|
||||
// Deallocate lf synchronization related mutex and data
|
||||
void av1_loop_filter_dealloc(AV1LfSync *lf_sync) {
|
||||
if (lf_sync != NULL) {
|
||||
#if CONFIG_MULTITHREAD
|
||||
int i;
|
||||
|
||||
if (lf_sync->mutex_ != NULL) {
|
||||
for (i = 0; i < lf_sync->rows; ++i) {
|
||||
pthread_mutex_destroy(&lf_sync->mutex_[i]);
|
||||
}
|
||||
aom_free(lf_sync->mutex_);
|
||||
}
|
||||
if (lf_sync->cond_ != NULL) {
|
||||
for (i = 0; i < lf_sync->rows; ++i) {
|
||||
pthread_cond_destroy(&lf_sync->cond_[i]);
|
||||
}
|
||||
aom_free(lf_sync->cond_);
|
||||
}
|
||||
#endif // CONFIG_MULTITHREAD
|
||||
aom_free(lf_sync->lfdata);
|
||||
aom_free(lf_sync->cur_sb_col);
|
||||
// clear the structure as the source of this call may be a resize in which
|
||||
// case this call will be followed by an _alloc() which may fail.
|
||||
av1_zero(*lf_sync);
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
|
||||
// members, so we treat it as an array, and sum over the whole length.
|
||||
void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts,
|
||||
FRAME_COUNTS *counts) {
|
||||
unsigned int *const acc = (unsigned int *)acc_counts;
|
||||
const unsigned int *const cnt = (unsigned int *)counts;
|
||||
|
||||
const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < n_counts; i++) acc[i] += cnt[i];
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user