Port commits related to clpf and qm experiments

Manually cherry-picked following commits from AOMedia git repository:
bb2727c Sort includess for "clpf.h"
c297fd0 Add quantisation matrix range parameters.
0527894 Add encoder option and signaling for quant matrix control.
4106232 Turn off trellis coding for quantization matrices.
4017fca Modify tests to allow quantization matrices.
1c122c2 Add quant and dequant functions for new quant matrices.
95a8999 Enable CLPF
f72782b Fix a build issue
73bae50 Add quantisation matrices and selection functions
33208d2 Added support for constrained low pass filter (CLPF)

Change-Id: I60fc1ee1ac40e6b9d1d00affd97547ee5d5dd6be
This commit is contained in:
Yaowu Xu
2016-08-11 09:39:47 -07:00
parent ac917ec262
commit 0818a7c828
28 changed files with 12166 additions and 84 deletions

3
configure vendored
View File

@@ -252,6 +252,7 @@ HAVE_LIST="
EXPERIMENT_LIST="
fp_mb_stats
emulate_hardware
clpf
var_tx
rect_tx
ref_mv
@@ -328,6 +329,7 @@ CONFIG_LIST="
better_hw_compatibility
experimental
size_limit
aom_qm
${EXPERIMENT_LIST}
"
CMDLINE_SELECT="
@@ -386,6 +388,7 @@ CMDLINE_SELECT="
better_hw_compatibility
vp9_highbitdepth
experimental
aom_qm
"
process_cmdline() {

View File

@@ -26,6 +26,7 @@
#include "vpx_mem/vpx_mem.h"
namespace {
#if !CONFIG_AOM_QM
const int kNumBlocks = 25;
const int kNumBlockEntries = 16;
@@ -199,4 +200,5 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
#endif // HAVE_MSA
#endif // CONFIG_AOM_QM
} // namespace

View File

@@ -295,11 +295,17 @@ typedef struct macroblockd_plane {
// log2 of n4_w, n4_h
uint8_t n4_wl, n4_hl;
#if CONFIG_AOM_QM
const qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
#endif
// encoder
const int16_t *dequant;
#if CONFIG_NEW_QUANT
const dequant_val_type_nuq *dequant_val_nuq[QUANT_PROFILES];
#endif // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
#endif
} MACROBLOCKD_PLANE;
#define BLOCK_OFFSET(x, i) ((x) + (i)*16)

99
vp10/common/clpf.c Normal file
View File

@@ -0,0 +1,99 @@
/*
Copyright (c) 2016 Cisco Systems
(Replace with proper AOM header)
*/
#include "vp10/common/clpf.h"
// Apply the filter on a single block
static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int has_top, int has_left, int has_bottom,
int has_right, int width, int height) {
int x, y;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int X = src[(y + 0) * sstride + x + 0];
int A = has_top ? src[(y - 1) * sstride + x + 0] : X;
int B = has_left ? src[(y + 0) * sstride + x - 1] : X;
int C = has_right ? src[(y + 0) * sstride + x + 1] : X;
int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X;
int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) -
((A < X) + (B < X) + (C < X) + (D < X) > 2);
dst[y * dstride + x] = X + delta;
}
}
}
#define BS (MI_SIZE * MAX_MIB_SIZE)
// Iterate over blocks within a superblock
static void vp10_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
const VP10_COMMON *cm, MACROBLOCKD *xd,
MODE_INFO *const *mi_8x8, int xpos, int ypos) {
// Temporary buffer (to allow SIMD parallelism)
uint8_t buf_unaligned[BS * BS + 15];
uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
int x, y, p;
for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
const MB_MODE_INFO *mbmi =
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
// Do not filter if there is no residual
if (!mbmi->skip) {
// Do not filter frame edges
int has_top = ypos + y > 0;
int has_left = xpos + x > 0;
int has_bottom = ypos + y < cm->mi_rows - 1;
int has_right = xpos + x < cm->mi_cols - 1;
#if CLPF_ALLOW_BLOCK_PARALLELISM
// Do not filter superblock edges
has_top &= !!y;
has_left &= !!x;
has_bottom &= y != MAX_MIB_SIZE - 1;
has_right &= x != MAX_MIB_SIZE - 1;
#endif
vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
clpf_block(
xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
? buf + y * MI_SIZE * BS + x * MI_SIZE
: xd->plane[p].dst.buf,
xd->plane[p].dst.stride,
CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
has_top, has_left, has_bottom, has_right,
MI_SIZE >> xd->plane[p].subsampling_x,
MI_SIZE >> xd->plane[p].subsampling_y);
}
}
}
#if CLPF_ALLOW_PIXEL_PARALLELISM
for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
const MB_MODE_INFO *mbmi =
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
if (!mbmi->skip) {
int i = 0;
for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
MI_SIZE >> xd->plane[p].subsampling_x);
}
}
}
#endif
}
}
// Iterate over the superblocks of an entire frame
void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
MACROBLOCKD *xd) {
int x, y;
for (y = 0; y < cm->mi_rows; y += MAX_MIB_SIZE)
for (x = 0; x < cm->mi_cols; x += MAX_MIB_SIZE)
vp10_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y);
}

22
vp10/common/clpf.h Normal file
View File

@@ -0,0 +1,22 @@
/*
Copyright (c) 2016, Cisco Systems
(Replace with proper AOM header)
*/
#ifndef VP10_COMMON_CLPF_H_
#define VP10_COMMON_CLPF_H_
#include "vp10/common/reconinter.h"
// Configuration
#define CLPF_ALLOW_PIXEL_PARALLELISM \
1 // 1 = SIMD friendly (adds a buffer requirement)
#define CLPF_ALLOW_BLOCK_PARALLELISM \
0 // 1 = MT friendly (degrades quality slighty)
#define CLPF_FILTER_ALL_PLANES \
0 // 1 = filter both luma and chroma, 0 = filter only luma
void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
MACROBLOCKD *xd);
#endif

View File

@@ -144,6 +144,9 @@ typedef struct VP10Common {
// Marks if we need to use 16bit frame buffers (1: yes, 0: no).
int use_highbitdepth;
#endif
#if CONFIG_CLPF
int clpf;
#endif
YV12_BUFFER_CONFIG *frame_to_show;
RefCntBuffer *prev_frame;
@@ -214,6 +217,23 @@ typedef struct VP10Common {
int uv_ac_delta_q;
int16_t y_dequant[MAX_SEGMENTS][2];
int16_t uv_dequant[MAX_SEGMENTS][2];
#if CONFIG_AOM_QM
// Global quant matrix tables
qm_val_t *giqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
qm_val_t *gqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
// Local quant matrix tables for each frame
qm_val_t *y_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
qm_val_t *uv_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
// Encoder
qm_val_t *y_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
qm_val_t *uv_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
int using_qmatrix;
int min_qmlevel;
int max_qmlevel;
#endif
#if CONFIG_NEW_QUANT
dequant_val_type_nuq y_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
dequant_val_type_nuq uv_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
@@ -430,12 +450,20 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
xd->above_context[i] = cm->above_context[i];
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
#if CONFIG_AOM_QM
memcpy(xd->plane[i].seg_iqmatrix, cm->y_iqmatrix, sizeof(cm->y_iqmatrix));
#endif
#if CONFIG_NEW_QUANT
memcpy(xd->plane[i].seg_dequant_nuq, cm->y_dequant_nuq,
sizeof(cm->y_dequant_nuq));
#endif
} else {
memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
#if CONFIG_AOM_QM
memcpy(xd->plane[i].seg_iqmatrix, cm->uv_iqmatrix,
sizeof(cm->uv_iqmatrix));
#endif
#if CONFIG_NEW_QUANT
memcpy(xd->plane[i].seg_dequant_nuq, cm->uv_dequant_nuq,
sizeof(cm->uv_dequant_nuq));

File diff suppressed because it is too large Load Diff

View File

@@ -13,6 +13,7 @@
#include "vpx/vpx_codec.h"
#include "vp10/common/seg_common.h"
#include "vp10/common/enums.h"
#ifdef __cplusplus
extern "C" {
@@ -22,12 +23,38 @@ extern "C" {
#define MAXQ 255
#define QINDEX_RANGE (MAXQ - MINQ + 1)
#define QINDEX_BITS 8
#if CONFIG_AOM_QM
// Total number of QM sets stored
#define QM_LEVEL_BITS 4
#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS)
/* Offset into the list of QMs. Actual number of levels used is
(NUM_QM_LEVELS-AOM_QM_OFFSET)
Lower value of AOM_QM_OFFSET implies more heavily weighted matrices.*/
#define DEFAULT_QM_FIRST (NUM_QM_LEVELS / 2)
#define DEFAULT_QM_LAST (NUM_QM_LEVELS - 1)
#endif
struct VP10Common;
int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
int vp10_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex);
#if CONFIG_AOM_QM
// Reduce the large number of quantizers to a smaller number of levels for which
// different matrices may be defined
static inline int aom_get_qmlevel(int qindex, int first, int last) {
int qmlevel = (qindex * (last + 1 - first) + QINDEX_RANGE / 2) / QINDEX_RANGE;
qmlevel = VPXMIN(qmlevel + first, NUM_QM_LEVELS - 1);
return qmlevel;
}
void aom_qm_init(struct VP10Common *cm);
qm_val_t *aom_iqmatrix(struct VP10Common *cm, int qindex, int comp,
int log2sizem2, int is_intra);
qm_val_t *aom_qmatrix(struct VP10Common *cm, int qindex, int comp,
int log2sizem2, int is_intra);
#endif
#if CONFIG_NEW_QUANT

View File

@@ -462,35 +462,64 @@ if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
# ENCODEMB INVOKE
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/;
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant/;
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/vp10_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
}
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error sse2 avx2 msa/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon sse2/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/;
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error sse2 avx2 msa/;
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon sse2/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
}
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
}
# fdct functions
@@ -817,11 +846,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp10_highbd_block_error sse2/;
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/vp10_highbd_quantize_fp sse4_1/;
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void vp10_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/vp10_highbd_quantize_b/;
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
} else {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/vp10_highbd_quantize_fp sse4_1/;
add_proto qw/void vp10_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/vp10_highbd_quantize_b/;
}
# fdct functions
add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

View File

@@ -25,6 +25,9 @@
#include "vpx_util/vpx_thread.h"
#include "vp10/common/alloccommon.h"
#if CONFIG_CLPF
#include "vp10/common/clpf.h"
#endif
#include "vp10/common/common.h"
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
@@ -1942,6 +1945,12 @@ static void setup_loopfilter(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
}
}
#if CONFIG_CLPF
static void setup_clpf(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
cm->clpf = vpx_rb_read_literal(rb, 1);
}
#endif
static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ? vpx_rb_read_inv_signed_literal(rb, 6) : 0;
}
@@ -1953,16 +1962,34 @@ static void setup_quantization(VP10_COMMON *const cm,
cm->uv_dc_delta_q = read_delta_q(rb);
cm->uv_ac_delta_q = read_delta_q(rb);
cm->dequant_bit_depth = cm->bit_depth;
#if CONFIG_AOM_QM
cm->using_qmatrix = vpx_rb_read_bit(rb);
if (cm->using_qmatrix) {
cm->min_qmlevel = vpx_rb_read_literal(rb, QM_LEVEL_BITS);
cm->max_qmlevel = vpx_rb_read_literal(rb, QM_LEVEL_BITS);
} else {
cm->min_qmlevel = 0;
cm->max_qmlevel = 0;
}
#endif
}
static void setup_segmentation_dequant(VP10_COMMON *const cm) {
// Build y/uv dequant values based on segmentation.
// Build y/uv dequant values based on segmentation.
int i = 0;
#if CONFIG_AOM_QM
int lossless;
int j = 0;
int qmlevel;
int using_qm = cm->using_qmatrix;
int minqm = cm->min_qmlevel;
int maxqm = cm->max_qmlevel;
#endif
#if CONFIG_NEW_QUANT
int b;
int dq;
#endif // CONFIG_NEW_QUANT
if (cm->seg.enabled) {
int i;
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
cm->y_dequant[i][0] =
@@ -1972,6 +1999,21 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[i][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
// NB: depends on base index so there is only 1 set per frame
// No quant weighting when lossless or signalled not using QM
qmlevel = (lossless || using_qm == 0)
? NUM_QM_LEVELS - 1
: aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 1, j, 0);
}
#endif // CONFIG_AOM_QM
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
for (b = 0; b < COEF_BANDS; ++b) {
@@ -1994,6 +2036,20 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[0][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
// No quant weighting when lossless or signalled not using QM
qmlevel = (lossless || using_qm == 0)
? NUM_QM_LEVELS - 1
: aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 1, j, 0);
}
#endif
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
for (b = 0; b < COEF_BANDS; ++b) {
@@ -2646,6 +2702,10 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, const uint8_t *data,
winterface->execute(&pbi->lf_worker);
}
#endif // CONFIG_VAR_TX
#if CONFIG_CLPF
if (cm->clpf && !cm->skip_loop_filter)
vp10_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
#endif
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX);
@@ -3179,6 +3239,9 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
#endif // CONFIG_EXT_PARTITION
setup_loopfilter(cm, rb);
#if CONFIG_CLPF
setup_clpf(cm, rb);
#endif
#if CONFIG_LOOP_RESTORATION
setup_restoration(cm, rb);
#endif // CONFIG_LOOP_RESTORATION

View File

@@ -112,6 +112,10 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
cm->setup_mi = vp10_dec_setup_mi;
vp10_loop_filter_init(cm);
#if CONFIG_AOM_QM
aom_qm_init(cm);
#endif
#if CONFIG_LOOP_RESTORATION
vp10_loop_restoration_precal();
#endif // CONFIG_LOOP_RESTORATION

View File

@@ -43,6 +43,13 @@ static INLINE int read_coeff(const vpx_prob *probs, int n, vp10_reader *r) {
return val;
}
#if CONFIG_AOM_QM
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
const int16_t *dq, int ctx, const int16_t *scan,
const int16_t *nb, vp10_reader *r,
const qm_val_t *iqm[2][TX_SIZES])
#else
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
const int16_t *dq,
@@ -50,11 +57,16 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
dequant_val_type_nuq *dq_val,
#endif // CONFIG_NEW_QUANT
int ctx, const int16_t *scan, const int16_t *nb,
vp10_reader *r) {
vp10_reader *r)
#endif
{
FRAME_COUNTS *counts = xd->counts;
const int max_eob = get_tx2d_size(tx_size);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
#if CONFIG_AOM_QM
const qm_val_t *iqmatrix = iqm[!ref][tx_size];
#endif
int band, c = 0;
const int tx_size_ctx = txsize_sqr_map[tx_size];
const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
@@ -197,9 +209,14 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
}
}
#if CONFIG_NEW_QUANT
v = vp10_dequant_abscoeff_nuq(val, dqv, dqv_val);
v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
#else
#if CONFIG_AOM_QM
dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
v = (val * dqv) >> dq_shift;
#endif // CONFIG_NEW_QUANT
@@ -489,12 +506,18 @@ int vp10_decode_block_tokens(MACROBLOCKD *const xd, int plane,
#endif // CONFIG_NEW_QUANT
#if !CONFIG_ANS
#if CONFIG_AOM_QM
const int eob =
decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
ctx, sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]);
#else
const int eob =
decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
#if CONFIG_NEW_QUANT
pd->seg_dequant_nuq[seg_id][dq],
#endif // CONFIG_NEW_QUANT
ctx, sc->scan, sc->neighbors, r);
#endif // CONFIG_AOM_QM
#else
const int eob = decode_coefs_ans(xd, pd->plane_type, pd->dqcoeff, tx_size,
tx_type, dequant,

View File

@@ -20,6 +20,9 @@
#include "vpx_ports/system_state.h"
#include "vpx_util/debug_util.h"
#if CONFIG_CLPF
#include "vp10/common/clpf.h"
#endif
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
#include "vp10/common/entropymv.h"
@@ -2437,6 +2440,13 @@ static void encode_loopfilter(VP10_COMMON *cm,
}
}
#if CONFIG_CLPF
static void encode_clpf(const VP10_COMMON *cm,
struct vpx_write_bit_buffer *wb) {
vpx_wb_write_literal(wb, cm->clpf, 1);
}
#endif
static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
if (delta_q != 0) {
vpx_wb_write_bit(wb, 1);
@@ -2452,6 +2462,13 @@ static void encode_quantization(const VP10_COMMON *const cm,
write_delta_q(wb, cm->y_dc_delta_q);
write_delta_q(wb, cm->uv_dc_delta_q);
write_delta_q(wb, cm->uv_ac_delta_q);
#if CONFIG_AOM_QM
vpx_wb_write_bit(wb, cm->using_qmatrix);
if (cm->using_qmatrix) {
vpx_wb_write_literal(wb, cm->min_qmlevel, QM_LEVEL_BITS);
vpx_wb_write_literal(wb, cm->max_qmlevel, QM_LEVEL_BITS);
}
#endif
}
static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
@@ -3083,6 +3100,9 @@ static void write_uncompressed_header(VP10_COMP *cpi,
#endif // CONFIG_EXT_PARTITION
encode_loopfilter(cm, wb);
#if CONFIG_CLPF
encode_clpf(cm, wb);
#endif
#if CONFIG_LOOP_RESTORATION
encode_restoration(cm, wb);
#endif // CONFIG_LOOP_RESTORATION

View File

@@ -1561,7 +1561,12 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int eob = -1;
int i, j;
@@ -1647,16 +1652,29 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
int tmp32;
#if CONFIG_AOM_QM
tmp32 = (tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
#else
tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
#endif
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;

View File

@@ -88,6 +88,11 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
#endif
const int shift = get_tx_scale(xd, tx_type, tx_size);
#if CONFIG_NEW_QUANT
int dq = get_dq_profile_from_ctx(ctx);
@@ -142,6 +147,9 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int base_bits, dx;
int64_t d2;
const int rc = scan[i];
#if CONFIG_AOM_QM
int iwt = iqmatrix[rc];
#endif
int x = qcoeff[rc];
next_shortcut = shortcut;
@@ -200,10 +208,18 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
(vp10_dequant_abscoeff_nuq(abs(x) - 1, dequant_ptr[rc != 0],
dequant_val[band_translate[i]]) <
(abs(coeff[rc]) << shift)));
#else // CONFIG_NEW_QUANT
#else // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
if ((abs(x) * dequant_ptr[rc != 0] * iwt >
((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
(abs(x) * dequant_ptr[rc != 0] * iwt <
(((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])
<< AOM_QM_BITS)))
#else
if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
(abs(x) * dequant_ptr[rc != 0] <
(abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
#endif // CONFIG_AOM_QM
shortcut = 1;
else
shortcut = 0;
@@ -366,6 +382,11 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
for (i = next; i < eob; i = next) {
const int x = tokens[i][best].qc;
const int rc = scan[i];
#if CONFIG_AOM_QM
const int iwt = iqmatrix[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#endif
if (x) final_eob = i;
qcoeff[rc] = x;
@@ -430,6 +451,12 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
#endif
const int16_t *src_diff;
const int tx2d_size = get_tx2d_size(tx_size);
@@ -452,7 +479,12 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam
#if CONFIG_AOM_QM
,
qmatrix, iqmatrix
#endif // CONFIG_AOM_QM
);
} else {
vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
}
@@ -465,7 +497,12 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam
#if CONFIG_AOM_QM
,
qmatrix, iqmatrix
#endif // CONFIG_AOM_QM
);
} else {
vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
}

View File

@@ -15,6 +15,9 @@
#include "./vpx_config.h"
#include "vp10/common/alloccommon.h"
#if CONFIG_CLPF
#include "vp10/common/clpf.h"
#endif
#include "vp10/common/filter.h"
#include "vp10/common/idct.h"
#include "vp10/common/reconinter.h"
@@ -2435,6 +2438,9 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
* vp10_init_quantizer() for every frame.
*/
vp10_init_quantizer(cpi);
#if CONFIG_AOM_QM
aom_qm_init(cm);
#endif
vp10_loop_filter_init(cm);
#if CONFIG_LOOP_RESTORATION
@@ -3337,6 +3343,65 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
#endif
}
#if CONFIG_CLPF
cm->clpf = 0;
if (!is_lossless_requested(&cpi->oxcf)) {
// Test CLPF
int i, hq = 1;
uint64_t before, after;
// TODO(yaowu): investigate per-segment CLPF decision and
// an optimal threshold, use 80 for now.
for (i = 0; i < MAX_SEGMENTS; i++)
hq &= vp10_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
if (!hq) { // Don't try filter if the entire image is nearly losslessly
// encoded
#if CLPF_FILTER_ALL_PLANES
vpx_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
before =
get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
vp10_clpf_frame(cm->frame_to_show, cm, xd);
after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
#else
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
before = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height);
vp10_clpf_frame(cm->frame_to_show, cm, xd);
after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height);
#endif
if (before < after) {
// No improvement, restore original
#if CLPF_FILTER_ALL_PLANES
vpx_yv12_copy_frame(&cpi->last_frame_uf, cm->frame_to_show);
#else
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
#endif
} else {
cm->clpf = 1;
}
}
}
#endif
#if CONFIG_LOOP_RESTORATION
if (cm->rst_info.restoration_type != RESTORE_NONE) {
vp10_loop_restoration_init(&cm->rst_internal, &cm->rst_info,
@@ -5259,6 +5324,12 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
cpi->scaled_ref_idx[i] = INVALID_IDX;
}
#if CONFIG_AOM_QM
cm->using_qmatrix = cpi->oxcf.using_qm;
cm->min_qmlevel = cpi->oxcf.qm_minlevel;
cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
#endif
if (oxcf->pass == 1) {
cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(oxcf);
vp10_first_pass(cpi, source);

View File

@@ -193,6 +193,11 @@ typedef struct VP10EncoderConfig {
int best_allowed_q;
int cq_level;
AQ_MODE aq_mode; // Adaptive Quantization mode
#if CONFIG_AOM_QM
int using_qm;
int qm_minlevel;
int qm_maxlevel;
#endif
// Internal frame size scaling.
RESIZE_TYPE resize_mode;

View File

@@ -756,7 +756,12 @@ void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int i, eob = -1;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
@@ -773,28 +778,47 @@ void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
int tmp32;
#if CONFIG_AOM_QM
tmp32 = (tmp * wt * quant_ptr[rc != 0]) >> (16 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
#else
tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
#endif
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_quantize_fp_c(
const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, int log_scale) {
void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
#endif
int log_scale) {
int i;
int eob = -1;
const int scale = 1 << log_scale;
@@ -814,13 +838,27 @@ void vp10_highbd_quantize_fp_c(
for (i = 0; i < count; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[rc != 0];
#if CONFIG_AOM_QM
const uint32_t abs_qcoeff =
(uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
#else
const uint32_t abs_qcoeff =
(uint32_t)((tmp * quant_ptr[rc != 0]) >> shift);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
#endif
if (abs_qcoeff) eob = i;
}
}
@@ -838,7 +876,12 @@ void vp10_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
@@ -851,31 +894,56 @@ void vp10_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
int64_t tmp = 0;
#endif
const int coeff_sign = (coeff >> 31);
int tmp = 0;
int tmp32 = 0;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
#if CONFIG_AOM_QM
if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) {
#else
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
#endif
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
#if CONFIG_AOM_QM
tmp = abs_coeff * wt;
tmp32 = (int)(tmp * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
#else
tmp32 = (abs_coeff * quant_ptr[rc != 0]) >> 15;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2;
#endif
}
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
}
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_quantize_b_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, int log_scale) {
void vp10_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
#if CONFIG_AOM_QM
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
#endif
int log_scale) {
int i, non_zero_count = (int)n_coeffs, eob = -1;
int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
int round[2] = { round_ptr[0], round_ptr[1] };
@@ -904,6 +972,14 @@ void vp10_highbd_quantize_b_c(
for (i = (int)n_coeffs - 1; i >= 0; i--) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
uint32_t abs_qcoeff = 0;
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
non_zero_count--;
@@ -918,14 +994,25 @@ void vp10_highbd_quantize_b_c(
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
#if CONFIG_AOM_QM
if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
#else
if (abs_coeff >= zbins[rc != 0]) {
#endif
const int64_t tmp1 = abs_coeff + round[rc != 0];
const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
#if CONFIG_AOM_QM
const uint32_t abs_qcoeff = (uint32_t)(
(tmp2 * wt * quant_shift_ptr[rc != 0]) >> (AOM_QM_BITS + shift));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / scale;
#else
const uint32_t abs_qcoeff =
(uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> shift);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
#endif // CONFIG_AOM_QM
if (abs_qcoeff) eob = i;
}
}
@@ -1064,6 +1151,14 @@ void vp10_init_plane_quantizers(const VP10_COMP *cpi, MACROBLOCK *x,
const int qindex = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
const int rdmult = vp10_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
int i;
#if CONFIG_AOM_QM
int minqm = cm->min_qmlevel;
int maxqm = cm->max_qmlevel;
// Quant matrix only depends on the base QP so there is only one set per frame
int qmlevel = (lossless || cm->using_qmatrix == 0)
? NUM_QM_LEVELS - 1
: aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
#endif
#if CONFIG_NEW_QUANT
int dq;
#endif
@@ -1075,6 +1170,12 @@ void vp10_init_plane_quantizers(const VP10_COMP *cpi, MACROBLOCK *x,
x->plane[0].quant_shift = quants->y_quant_shift[qindex];
x->plane[0].zbin = quants->y_zbin[qindex];
x->plane[0].round = quants->y_round[qindex];
#if CONFIG_AOM_QM
memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0],
sizeof(cm->gqmatrix[qmlevel][0]));
memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0],
sizeof(cm->giqmatrix[qmlevel][0]));
#endif
xd->plane[0].dequant = cpi->y_dequant[qindex];
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
@@ -1094,6 +1195,12 @@ void vp10_init_plane_quantizers(const VP10_COMP *cpi, MACROBLOCK *x,
x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
x->plane[i].zbin = quants->uv_zbin[qindex];
x->plane[i].round = quants->uv_round[qindex];
#if CONFIG_AOM_QM
memcpy(&xd->plane[i].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1],
sizeof(cm->gqmatrix[qmlevel][1]));
memcpy(&xd->plane[i].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1],
sizeof(cm->giqmatrix[qmlevel][1]));
#endif
xd->plane[i].dequant = cpi->uv_dequant[qindex];
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {

View File

@@ -12,6 +12,7 @@
#define VP10_ENCODER_QUANTIZE_H_
#include "./vpx_config.h"
#include "vp10/common/quant_common.h"
#include "vp10/common/scan.h"
#include "vp10/encoder/block.h"

View File

@@ -619,7 +619,12 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
vp10_find_best_sub_pixel_tree_pruned_evenmore;
}
#if !CONFIG_AOM_QM
x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
#else
// FIXME: trellis not very efficient for quantisation matrices
x->optimize = 0;
#endif
x->min_partition_size = sf->default_min_partition_size;
x->max_partition_size = sf->default_max_partition_size;

View File

@@ -89,6 +89,8 @@ ifeq (yes,$(filter yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)))
VP10_COMMON_SRCS-yes += common/warped_motion.h
VP10_COMMON_SRCS-yes += common/warped_motion.c
endif
VP10_COMMON_SRCS-yes += common/clpf.c
VP10_COMMON_SRCS-yes += common/clpf.h
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c

View File

@@ -43,6 +43,11 @@ struct vp10_extracfg {
unsigned int rc_max_inter_bitrate_pct;
unsigned int gf_cbr_boost_pct;
unsigned int lossless;
#if CONFIG_AOM_QM
unsigned int enable_qm;
unsigned int qm_min;
unsigned int qm_max;
#endif
unsigned int frame_parallel_decoding_mode;
AQ_MODE aq_mode;
unsigned int frame_periodic_boost;
@@ -70,17 +75,22 @@ static struct vp10_extracfg default_extra_cfg = {
#else
0, // tile_columns
0, // tile_rows
#endif // CONFIG_EXT_TILE
7, // arnr_max_frames
5, // arnr_strength
0, // min_gf_interval; 0 -> default decision
0, // max_gf_interval; 0 -> default decision
VPX_TUNE_PSNR, // tuning
10, // cq_level
0, // rc_max_intra_bitrate_pct
0, // rc_max_inter_bitrate_pct
0, // gf_cbr_boost_pct
0, // lossless
#endif // CONFIG_EXT_TILE
7, // arnr_max_frames
5, // arnr_strength
0, // min_gf_interval; 0 -> default decision
0, // max_gf_interval; 0 -> default decision
VPX_TUNE_PSNR, // tuning
10, // cq_level
0, // rc_max_intra_bitrate_pct
0, // rc_max_inter_bitrate_pct
0, // gf_cbr_boost_pct
0, // lossless
#if CONFIG_AOM_QM
0, // enable_qm
DEFAULT_QM_FIRST, // qm_min
DEFAULT_QM_LAST, // qm_max
#endif
1, // frame_parallel_decoding_mode
NO_AQ, // aq_mode
0, // frame_periodic_delta_q
@@ -378,6 +388,12 @@ static vpx_codec_err_t set_encoder_config(
oxcf->cq_level = vp10_quantizer_to_qindex(extra_cfg->cq_level);
oxcf->fixed_q = -1;
#if CONFIG_AOM_QM
oxcf->using_qm = extra_cfg->enable_qm;
oxcf->qm_minlevel = extra_cfg->qm_min;
oxcf->qm_maxlevel = extra_cfg->qm_max;
#endif
oxcf->under_shoot_pct = cfg->rc_undershoot_pct;
oxcf->over_shoot_pct = cfg->rc_overshoot_pct;
@@ -682,6 +698,29 @@ static vpx_codec_err_t ctrl_set_lossless(vpx_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
#if CONFIG_AOM_QM
static vpx_codec_err_t ctrl_set_enable_qm(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp10_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.enable_qm = CAST(VP9E_SET_ENABLE_QM, args);
return update_extra_cfg(ctx, &extra_cfg);
}
static vpx_codec_err_t ctrl_set_qm_min(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp10_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.qm_min = CAST(VP9E_SET_QM_MIN, args);
return update_extra_cfg(ctx, &extra_cfg);
}
static vpx_codec_err_t ctrl_set_qm_max(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp10_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.qm_max = CAST(VP9E_SET_QM_MAX, args);
return update_extra_cfg(ctx, &extra_cfg);
}
#endif
static vpx_codec_err_t ctrl_set_frame_parallel_decoding_mode(
vpx_codec_alg_priv_t *ctx, va_list args) {
struct vp10_extracfg extra_cfg = ctx->extra_cfg;
@@ -1280,6 +1319,11 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_SET_MAX_INTER_BITRATE_PCT, ctrl_set_rc_max_inter_bitrate_pct },
{ VP9E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct },
{ VP9E_SET_LOSSLESS, ctrl_set_lossless },
#if CONFIG_AOM_QM
{ VP9E_SET_ENABLE_QM, ctrl_set_enable_qm },
{ VP9E_SET_QM_MIN, ctrl_set_qm_min },
{ VP9E_SET_QM_MAX, ctrl_set_qm_max },
#endif
{ VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode },
{ VP9E_SET_AQ_MODE, ctrl_set_aq_mode },
{ VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost },

View File

@@ -314,6 +314,48 @@ enum vp8e_enc_control_id {
* Supported in codecs: VP9
*/
VP9E_SET_LOSSLESS,
#if CONFIG_AOM_QM
/*!\brief Codec control function to encode with quantisation matrices.
*
* AOM can operate with default quantisation matrices dependent on
* quantisation level and block type.
* 0 = do not use quantisation matrices
* 1 = use quantisation matrices
*
* By default, the encoder operates without quantisation matrices.
*
* Supported in codecs: AOM
*/
VP9E_SET_ENABLE_QM,
/*!\brief Codec control function to set the min quant matrix flatness.
*
* AOM can operate with different ranges of quantisation matrices.
* As quantisation levels increase, the matrices get flatter. This
* control sets the minimum level of flatness from which the matrices
* are determined.
*
* By default, the encoder sets this minimum at half the available
* range.
*
* Supported in codecs: AOM
*/
VP9E_SET_QM_MIN,
/*!\brief Codec control function to set the max quant matrix flatness.
*
* AOM can operate with different ranges of quantisation matrices.
* As quantisation levels increase, the matrices get flatter. This
* control sets the maximum level of flatness possible.
*
* By default, the encoder sets this maximum at the top of the
* available range.
*
* Supported in codecs: AOM
*/
VP9E_SET_QM_MAX,
#endif
/*!\brief Codec control function to set number of tile columns.
*
@@ -651,6 +693,17 @@ VPX_CTRL_USE_TYPE(VP9E_SET_GF_CBR_BOOST_PCT, unsigned int)
VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int)
#define VPX_CTRL_VP9E_SET_LOSSLESS
#if CONFIG_AOM_QM
VPX_CTRL_USE_TYPE(VP9E_SET_ENABLE_QM, unsigned int)
#define VPX_CTRL_VP9E_SET_ENABLE_QM
VPX_CTRL_USE_TYPE(VP9E_SET_QM_MIN, unsigned int)
#define VPX_CTRL_VP9E_SET_QM_MIN
VPX_CTRL_USE_TYPE(VP9E_SET_QM_MAX, unsigned int)
#define VPX_CTRL_VP9E_SET_QM_MAX
#endif
VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int)
#define VPX_CTRL_VP9E_SET_FRAME_PARALLEL_DECODING

View File

@@ -12,6 +12,371 @@
#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
#if CONFIG_AOM_QM
void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int64_t tmp, eob = -1;
int32_t tmp32;
int dequant =
(dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (16 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
if (tmp32) eob = 0;
}
*eob_ptr = eob + 1;
}
#if CONFIG_VPX_HIGHBITDEPTH
void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t quant, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
uint16_t *eob_ptr, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr) {
int eob = -1;
int dequant =
(dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
const int coeff = coeff_ptr[0];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[0];
const uint32_t abs_qcoeff =
(uint32_t)((tmp * qm_ptr[0] * quant) >> (16 + AOM_QM_BITS));
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant;
if (abs_qcoeff) eob = 0;
}
*eob_ptr = eob + 1;
}
#endif
void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
const int n_coeffs = 1024;
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int64_t tmp, eob = -1;
int32_t tmp32;
int dequant;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
INT16_MIN, INT16_MAX);
tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (15 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dequant =
(dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
if (tmp32) eob = 0;
}
*eob_ptr = eob + 1;
}
#if CONFIG_VPX_HIGHBITDEPTH
void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr) {
const int n_coeffs = 1024;
int eob = -1;
int dequant;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
const int coeff = coeff_ptr[0];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
const uint32_t abs_qcoeff =
(uint32_t)((tmp * qm_ptr[0] * quant) >> (15 + AOM_QM_BITS));
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dequant =
(dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 2;
if (abs_qcoeff) eob = 0;
}
*eob_ptr = eob + 1;
}
#endif
void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr) {
int i, non_zero_count = (int)n_coeffs, eob = -1;
const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = (int)n_coeffs - 1; i >= 0; i--) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr[rc];
const int coeff = coeff_ptr[rc] * wt;
if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
coeff > (nzbins[rc != 0] << AOM_QM_BITS))
non_zero_count--;
else
break;
}
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < non_zero_count; i++) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr[rc];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int dequant;
if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
int32_t tmp32;
int64_t tmp =
clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = tmp * wt;
tmp32 = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
quant_shift_ptr[rc != 0]) >>
(16 + AOM_QM_BITS); // quantization
dequant =
(dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
if (tmp32) eob = i;
}
}
}
*eob_ptr = eob + 1;
}
#if CONFIG_VPX_HIGHBITDEPTH
void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
int i, non_zero_count = (int)n_coeffs, eob = -1;
const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int dequant;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = (int)n_coeffs - 1; i >= 0; i--) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr[rc];
const int coeff = coeff_ptr[rc] * wt;
if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
coeff > (nzbins[rc != 0] << AOM_QM_BITS))
non_zero_count--;
else
break;
}
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < non_zero_count; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
const qm_val_t wt = qm_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
const int64_t tmpw = tmp1 * wt;
const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
const uint32_t abs_qcoeff =
(uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dequant =
(dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
if (abs_qcoeff) eob = i;
}
}
}
*eob_ptr = eob + 1;
}
#endif
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int idx = 0;
int idx_arr[1024];
int i, eob = -1;
int dequant;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr[rc];
const int coeff = coeff_ptr[rc] * wt;
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
const int rc = scan[idx_arr[i]];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const qm_val_t wt = qm_ptr[rc];
int64_t tmp;
int tmp32;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
tmp = tmp * wt;
tmp32 = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
quant_shift_ptr[rc != 0]) >>
(15 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dequant =
(dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
if (tmp32) eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
}
#if CONFIG_VPX_HIGHBITDEPTH
void vpx_highbd_quantize_b_32x32_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int idx = 0;
int idx_arr[1024];
int i, eob = -1;
int dequant;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr[rc];
const int coeff = coeff_ptr[rc] * wt;
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
const int rc = scan[idx_arr[i]];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const qm_val_t wt = qm_ptr[rc];
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp1 =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
const int64_t tmpw = tmp1 * wt;
const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
const uint32_t abs_qcoeff =
(uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (15 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dequant =
(dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
if (abs_qcoeff) eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
}
#endif
#else
void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
@@ -318,3 +683,4 @@ void vpx_highbd_quantize_b_32x32_c(
*eob_ptr = eob + 1;
}
#endif
#endif

View File

@@ -18,6 +18,47 @@
extern "C" {
#endif
#if CONFIG_AOM_QM
void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr);
#if CONFIG_VP9_HIGHBITDEPTH
void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
uint16_t *eob_ptr, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr);
void vpx_highbd_quantize_dc_32x32(
const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr,
const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr);
void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
#endif
#else
void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
@@ -40,6 +81,7 @@ void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
#endif
#endif
#ifdef __cplusplus
} // extern "C"

View File

@@ -53,6 +53,10 @@ extern "C" {
a = c; \
} while (0)
#if CONFIG_AOM_QM
typedef uint16_t qm_val_t;
#define AOM_QM_BITS 6
#endif
#if CONFIG_VP9_HIGHBITDEPTH
// Note:
// tran_low_t is the datatype used for final transform coefficients.

View File

@@ -920,22 +920,35 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Quantization
#
if ((vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP10_ENCODER
add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
} # CONFIG_VPX_HIGHBITDEPTH
} # CONFIG_VP10_ENCODER
} else {
if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b sse2/;
add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP10_ENCODER
} # CONFIG_AOM_QM
if (vpx_config("CONFIG_VP10") eq "yes") {
#
# Alpha blending with mask

View File

@@ -366,6 +366,15 @@ static const arg_def_t tile_rows =
"Number of tile rows to use, log2 (set to 0 while threads > 1)");
static const arg_def_t lossless =
ARG_DEF(NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)");
#if CONFIG_AOM_QM
static const arg_def_t enable_qm =
ARG_DEF(NULL, "enable_qm", 1,
"Enable quantisation matrices (0: false (default), 1: true)");
static const arg_def_t qm_min = ARG_DEF(
NULL, "qm_min", 1, "Min quant matrix flatness (0..15), default is 8");
static const arg_def_t qm_max = ARG_DEF(
NULL, "qm_max", 1, "Max quant matrix flatness (0..15), default is 16");
#endif
static const arg_def_t frame_parallel_decoding = ARG_DEF(
NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
static const arg_def_t aq_mode = ARG_DEF(