2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
2013-10-02 23:13:33 +02:00
|
|
|
#include "./vp9_rtcd.h"
|
2012-12-23 16:20:10 +01:00
|
|
|
#include "./vpx_config.h"
|
2015-07-06 18:33:27 +02:00
|
|
|
#include "./vpx_dsp_rtcd.h"
|
2013-10-02 23:13:33 +02:00
|
|
|
|
2015-07-17 21:05:42 +02:00
|
|
|
#include "vpx_dsp/quantize.h"
|
2013-10-02 23:13:33 +02:00
|
|
|
#include "vpx_mem/vpx_mem.h"
|
2015-05-12 04:09:22 +02:00
|
|
|
#include "vpx_ports/mem.h"
|
2013-10-02 23:13:33 +02:00
|
|
|
|
|
|
|
#include "vp9/common/vp9_idct.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_reconinter.h"
|
|
|
|
#include "vp9/common/vp9_reconintra.h"
|
2015-05-22 20:19:51 +02:00
|
|
|
#include "vp9/common/vp9_scan.h"
|
2013-10-02 23:13:33 +02:00
|
|
|
|
|
|
|
#include "vp9/encoder/vp9_encodemb.h"
|
2014-07-02 21:36:48 +02:00
|
|
|
#include "vp9/encoder/vp9_rd.h"
|
2013-10-02 23:13:33 +02:00
|
|
|
#include "vp9/encoder/vp9_tokenize.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2014-02-09 04:30:45 +01:00
|
|
|
struct optimize_ctx {
|
|
|
|
ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
|
|
|
|
ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
|
|
|
|
};
|
|
|
|
|
2014-02-17 13:57:40 +01:00
|
|
|
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
|
2013-05-31 21:30:32 +02:00
|
|
|
struct macroblock_plane *const p = &x->plane[plane];
|
2013-11-20 21:39:29 +01:00
|
|
|
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
|
|
|
|
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
|
|
|
|
const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
|
|
|
const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
|
2013-05-31 21:30:32 +02:00
|
|
|
|
2014-09-24 15:36:34 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
2015-07-06 18:33:27 +02:00
|
|
|
vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->src.stride, pd->dst.buf, pd->dst.stride,
|
|
|
|
x->e_mbd.bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
2015-07-06 18:33:27 +02:00
|
|
|
vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
|
2013-05-31 21:30:32 +02:00
|
|
|
pd->dst.buf, pd->dst.stride);
|
2011-08-05 01:30:27 +02:00
|
|
|
}
|
|
|
|
|
2017-06-08 23:51:01 +02:00
|
|
|
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
|
2018-01-12 18:39:53 +01:00
|
|
|
{ 10, 6 },
|
|
|
|
{ 8, 5 },
|
2017-06-08 23:51:01 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
// 'num' can be negative, but 'shift' must be non-negative.
|
|
|
|
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
|
|
|
|
((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift))
|
|
|
|
|
|
|
|
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
|
|
|
|
int ctx) {
|
|
|
|
MACROBLOCKD *const xd = &mb->e_mbd;
|
|
|
|
struct macroblock_plane *const p = &mb->plane[plane];
|
|
|
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
|
|
|
const int ref = is_inter_block(xd->mi[0]);
|
|
|
|
uint8_t token_cache[1024];
|
|
|
|
const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
|
|
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
|
|
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
|
|
|
const int eob = p->eobs[block];
|
|
|
|
const PLANE_TYPE plane_type = get_plane_type(plane);
|
|
|
|
const int default_eob = 16 << (tx_size << 1);
|
|
|
|
const int shift = (tx_size == TX_32X32);
|
|
|
|
const int16_t *const dequant_ptr = pd->dequant;
|
|
|
|
const uint8_t *const band_translate = get_band_translate(tx_size);
|
|
|
|
const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
|
|
|
|
const int16_t *const scan = so->scan;
|
|
|
|
const int16_t *const nb = so->neighbors;
|
|
|
|
const int64_t rdmult =
|
|
|
|
((int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
|
|
|
|
const int64_t rddiv = mb->rddiv;
|
|
|
|
int64_t rd_cost0, rd_cost1;
|
|
|
|
int64_t rate0, rate1;
|
|
|
|
int16_t t0, t1;
|
|
|
|
int i, final_eob;
|
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
|
|
|
|
#else
|
|
|
|
const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
|
|
|
|
#endif
|
|
|
|
unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
|
|
|
|
mb->token_costs[tx_size][plane_type][ref];
|
|
|
|
unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
|
|
|
|
int64_t eob_cost0, eob_cost1;
|
|
|
|
const int ctx0 = ctx;
|
|
|
|
int64_t accu_rate = 0;
|
|
|
|
// Initialized to the worst possible error for the largest transform size.
|
|
|
|
// This ensures that it never goes negative.
|
|
|
|
int64_t accu_error = ((int64_t)1) << 50;
|
|
|
|
int64_t best_block_rd_cost = INT64_MAX;
|
|
|
|
int x_prev = 1;
|
2017-07-11 22:05:29 +02:00
|
|
|
tran_low_t before_best_eob_qc = 0;
|
|
|
|
tran_low_t before_best_eob_dqc = 0;
|
|
|
|
|
2017-06-08 23:51:01 +02:00
|
|
|
assert((!plane_type && !plane) || (plane_type && plane));
|
|
|
|
assert(eob <= default_eob);
|
|
|
|
|
|
|
|
for (i = 0; i < eob; i++) {
|
|
|
|
const int rc = scan[i];
|
2017-07-11 22:05:29 +02:00
|
|
|
token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
|
2017-06-08 23:51:01 +02:00
|
|
|
}
|
|
|
|
final_eob = 0;
|
|
|
|
|
|
|
|
// Initial RD cost.
|
|
|
|
token_costs_cur = token_costs + band_translate[0];
|
|
|
|
rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
|
|
|
|
best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
|
|
|
|
|
|
|
|
// For each token, pick one of two choices greedily:
|
|
|
|
// (i) First candidate: Keep current quantized value, OR
|
|
|
|
// (ii) Second candidate: Reduce quantized value by 1.
|
|
|
|
for (i = 0; i < eob; i++) {
|
|
|
|
const int rc = scan[i];
|
|
|
|
const int x = qcoeff[rc];
|
|
|
|
const int band_cur = band_translate[i];
|
|
|
|
const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
|
|
|
|
const int token_tree_sel_cur = (x_prev == 0);
|
|
|
|
token_costs_cur = token_costs + band_cur;
|
|
|
|
if (x == 0) { // No need to search
|
2017-07-11 22:05:29 +02:00
|
|
|
const int token = vp9_get_token(x);
|
|
|
|
rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
|
2017-06-08 23:51:01 +02:00
|
|
|
accu_rate += rate0;
|
|
|
|
x_prev = 0;
|
|
|
|
// Note: accu_error does not change.
|
|
|
|
} else {
|
|
|
|
const int dqv = dequant_ptr[rc != 0];
|
|
|
|
// Compute the distortion for quantizing to 0.
|
|
|
|
const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
|
|
|
|
const int diff_for_zero =
|
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
|
|
|
? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
|
|
|
|
:
|
|
|
|
#endif
|
|
|
|
diff_for_zero_raw;
|
|
|
|
const int64_t distortion_for_zero =
|
|
|
|
(int64_t)diff_for_zero * diff_for_zero;
|
|
|
|
|
|
|
|
// Compute the distortion for the first candidate
|
|
|
|
const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
|
|
|
|
const int diff0 =
|
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
|
|
|
? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
|
|
|
|
:
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
diff0_raw;
|
|
|
|
const int64_t distortion0 = (int64_t)diff0 * diff0;
|
|
|
|
|
|
|
|
// Compute the distortion for the second candidate
|
|
|
|
const int sign = -(x < 0); // -1 if x is negative and 0 otherwise.
|
|
|
|
const int x1 = x - 2 * sign - 1; // abs(x1) = abs(x) - 1.
|
|
|
|
int64_t distortion1;
|
|
|
|
if (x1 != 0) {
|
|
|
|
const int dqv_step =
|
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
|
|
|
|
:
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
dqv;
|
|
|
|
const int diff_step = (dqv_step + sign) ^ sign;
|
|
|
|
const int diff1 = diff0 - diff_step;
|
|
|
|
assert(dqv > 0); // We aren't right shifting a negative number above.
|
|
|
|
distortion1 = (int64_t)diff1 * diff1;
|
|
|
|
} else {
|
|
|
|
distortion1 = distortion_for_zero;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
// Calculate RDCost for current coeff for the two candidates.
|
|
|
|
const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
|
|
|
|
const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
|
|
|
|
rate0 =
|
|
|
|
base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
|
|
|
|
rate1 =
|
|
|
|
base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
|
|
|
|
}
|
|
|
|
{
|
|
|
|
int rdcost_better_for_x1, eob_rdcost_better_for_x1;
|
|
|
|
int dqc0, dqc1;
|
|
|
|
int64_t best_eob_cost_cur;
|
2017-07-11 22:05:29 +02:00
|
|
|
int use_x1;
|
2017-06-08 23:51:01 +02:00
|
|
|
|
|
|
|
// Calculate RD Cost effect on the next coeff for the two candidates.
|
|
|
|
int64_t next_bits0 = 0;
|
|
|
|
int64_t next_bits1 = 0;
|
|
|
|
int64_t next_eob_bits0 = 0;
|
|
|
|
int64_t next_eob_bits1 = 0;
|
|
|
|
if (i < default_eob - 1) {
|
|
|
|
int ctx_next, token_tree_sel_next;
|
|
|
|
const int band_next = band_translate[i + 1];
|
2017-07-11 22:05:29 +02:00
|
|
|
const int token_next =
|
|
|
|
(i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
|
2018-01-12 18:39:53 +01:00
|
|
|
unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
|
|
|
|
[ENTROPY_TOKENS] =
|
|
|
|
token_costs + band_next;
|
2017-06-08 23:51:01 +02:00
|
|
|
token_cache[rc] = vp9_pt_energy_class[t0];
|
|
|
|
ctx_next = get_coef_context(nb, token_cache, i + 1);
|
|
|
|
token_tree_sel_next = (x == 0);
|
2017-07-11 22:05:29 +02:00
|
|
|
next_bits0 =
|
|
|
|
(*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
|
2017-06-08 23:51:01 +02:00
|
|
|
next_eob_bits0 =
|
|
|
|
(*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
|
|
|
|
token_cache[rc] = vp9_pt_energy_class[t1];
|
|
|
|
ctx_next = get_coef_context(nb, token_cache, i + 1);
|
|
|
|
token_tree_sel_next = (x1 == 0);
|
2017-07-11 22:05:29 +02:00
|
|
|
next_bits1 =
|
|
|
|
(*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
|
2017-06-08 23:51:01 +02:00
|
|
|
if (x1 != 0) {
|
|
|
|
next_eob_bits1 =
|
|
|
|
(*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compare the total RD costs for two candidates.
|
|
|
|
rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
|
|
|
|
rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
|
|
|
|
rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
|
|
|
|
eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
|
|
|
|
(accu_error + distortion0 - distortion_for_zero));
|
|
|
|
eob_cost1 = eob_cost0;
|
|
|
|
if (x1 != 0) {
|
|
|
|
eob_cost1 =
|
|
|
|
RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
|
|
|
|
(accu_error + distortion1 - distortion_for_zero));
|
|
|
|
eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
|
|
|
|
} else {
|
|
|
|
eob_rdcost_better_for_x1 = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate the two candidate de-quantized values.
|
|
|
|
dqc0 = dqcoeff[rc];
|
|
|
|
dqc1 = 0;
|
|
|
|
if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
|
|
|
|
if (x1 != 0) {
|
|
|
|
dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
|
|
|
|
} else {
|
|
|
|
dqc1 = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pick and record the better quantized and de-quantized values.
|
|
|
|
if (rdcost_better_for_x1) {
|
|
|
|
qcoeff[rc] = x1;
|
|
|
|
dqcoeff[rc] = dqc1;
|
|
|
|
accu_rate += rate1;
|
|
|
|
accu_error += distortion1 - distortion_for_zero;
|
|
|
|
assert(distortion1 <= distortion_for_zero);
|
|
|
|
token_cache[rc] = vp9_pt_energy_class[t1];
|
|
|
|
} else {
|
|
|
|
accu_rate += rate0;
|
|
|
|
accu_error += distortion0 - distortion_for_zero;
|
|
|
|
assert(distortion0 <= distortion_for_zero);
|
|
|
|
token_cache[rc] = vp9_pt_energy_class[t0];
|
|
|
|
}
|
|
|
|
assert(accu_error >= 0);
|
|
|
|
x_prev = qcoeff[rc]; // Update based on selected quantized value.
|
|
|
|
|
2017-07-11 22:05:29 +02:00
|
|
|
use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
|
|
|
|
best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
|
2017-06-08 23:51:01 +02:00
|
|
|
|
|
|
|
// Determine whether to move the eob position to i+1
|
|
|
|
if (best_eob_cost_cur < best_block_rd_cost) {
|
|
|
|
best_block_rd_cost = best_eob_cost_cur;
|
|
|
|
final_eob = i + 1;
|
2017-07-11 22:05:29 +02:00
|
|
|
if (use_x1) {
|
|
|
|
before_best_eob_qc = x1;
|
|
|
|
before_best_eob_dqc = dqc1;
|
|
|
|
} else {
|
|
|
|
before_best_eob_qc = x;
|
|
|
|
before_best_eob_dqc = dqc0;
|
|
|
|
}
|
2017-06-08 23:51:01 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(final_eob <= eob);
|
|
|
|
if (final_eob > 0) {
|
|
|
|
int rc;
|
2017-07-11 22:05:29 +02:00
|
|
|
assert(before_best_eob_qc != 0);
|
2017-06-08 23:51:01 +02:00
|
|
|
i = final_eob - 1;
|
|
|
|
rc = scan[i];
|
2017-07-11 22:05:29 +02:00
|
|
|
qcoeff[rc] = before_best_eob_qc;
|
|
|
|
dqcoeff[rc] = before_best_eob_dqc;
|
2017-06-08 23:51:01 +02:00
|
|
|
}
|
|
|
|
for (i = final_eob; i < eob; i++) {
|
|
|
|
int rc = scan[i];
|
|
|
|
qcoeff[rc] = 0;
|
|
|
|
dqcoeff[rc] = 0;
|
|
|
|
}
|
|
|
|
mb->plane[plane].eobs[block] = final_eob;
|
|
|
|
return final_eob;
|
|
|
|
}
|
|
|
|
#undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
|
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
|
|
|
|
tran_low_t *dst, int src_stride) {
|
2014-02-17 12:34:02 +01:00
|
|
|
if (rd_transform)
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct32x32_rd(src, dst, src_stride);
|
2014-02-17 12:34:02 +01:00
|
|
|
else
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct32x32(src, dst, src_stride);
|
2014-02-17 12:34:02 +01:00
|
|
|
}
|
|
|
|
|
2014-09-03 01:34:09 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
2014-10-08 21:43:22 +02:00
|
|
|
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
|
|
|
|
tran_low_t *dst, int src_stride) {
|
2014-09-03 01:34:09 +02:00
|
|
|
if (rd_transform)
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct32x32_rd(src, dst, src_stride);
|
2014-09-03 01:34:09 +02:00
|
|
|
else
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct32x32(src, dst, src_stride);
|
2014-09-03 01:34:09 +02:00
|
|
|
}
|
2014-09-24 15:36:34 +02:00
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
2014-09-03 01:34:09 +02:00
|
|
|
|
2016-07-01 21:20:45 +02:00
|
|
|
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
|
2014-05-30 03:14:17 +02:00
|
|
|
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
|
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
|
|
|
const struct macroblock_plane *const p = &x->plane[plane];
|
|
|
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
2014-07-02 01:10:44 +02:00
|
|
|
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
|
|
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
|
|
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
2014-07-02 01:10:44 +02:00
|
|
|
uint16_t *const eob = &p->eobs[block];
|
|
|
|
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
|
|
|
const int16_t *src_diff;
|
2016-07-01 21:20:45 +02:00
|
|
|
src_diff = &p->src_diff[4 * (row * diff_stride + col)];
|
2017-08-16 22:34:14 +02:00
|
|
|
// skip block condition should be handled before this is called.
|
|
|
|
assert(!x->skip_block);
|
2014-07-02 01:10:44 +02:00
|
|
|
|
2014-09-24 15:36:34 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
|
|
|
switch (tx_size) {
|
|
|
|
case TX_32X32:
|
2014-10-08 21:43:22 +02:00
|
|
|
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
2017-02-14 01:29:49 +01:00
|
|
|
vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
|
|
|
|
p->quant_fp, qcoeff, dqcoeff, pd->dequant,
|
|
|
|
eob, scan_order->scan, scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
|
2017-02-14 01:29:49 +01:00
|
|
|
vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp,
|
|
|
|
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
|
|
|
|
scan_order->scan, scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
|
2017-02-14 01:29:49 +01:00
|
|
|
vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp,
|
|
|
|
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
|
|
|
|
scan_order->scan, scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2017-02-14 01:29:49 +01:00
|
|
|
vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp,
|
|
|
|
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
|
|
|
|
scan_order->scan, scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
|
2014-07-02 01:10:44 +02:00
|
|
|
switch (tx_size) {
|
|
|
|
case TX_32X32:
|
|
|
|
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
2017-02-14 01:29:49 +01:00
|
|
|
vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
|
|
|
|
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
|
|
|
|
scan_order->scan, scan_order->iscan);
|
2014-07-02 01:10:44 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct16x16(src_diff, coeff, diff_stride);
|
2017-02-14 01:29:49 +01:00
|
|
|
vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp,
|
|
|
|
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-07-02 01:10:44 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2016-07-27 05:43:23 +02:00
|
|
|
vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block,
|
2017-02-14 01:29:49 +01:00
|
|
|
p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant,
|
|
|
|
eob, scan_order->scan, scan_order->iscan);
|
2014-07-02 01:10:44 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2017-02-14 01:29:49 +01:00
|
|
|
vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
|
|
|
|
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-08-04 17:52:53 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0); break;
|
2014-07-02 01:10:44 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-01 21:20:45 +02:00
|
|
|
void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
|
2014-07-02 01:10:44 +02:00
|
|
|
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
|
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
|
|
|
const struct macroblock_plane *const p = &x->plane[plane];
|
|
|
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
|
|
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
|
|
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
2014-05-30 03:14:17 +02:00
|
|
|
uint16_t *const eob = &p->eobs[block];
|
|
|
|
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
|
|
|
const int16_t *src_diff;
|
2016-07-01 21:20:45 +02:00
|
|
|
src_diff = &p->src_diff[4 * (row * diff_stride + col)];
|
2017-08-16 22:34:14 +02:00
|
|
|
// skip block condition should be handled before this is called.
|
|
|
|
assert(!x->skip_block);
|
|
|
|
|
2014-09-24 15:36:34 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
|
|
|
switch (tx_size) {
|
|
|
|
case TX_32X32:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->quant_fp[0], qcoeff, dqcoeff,
|
|
|
|
pd->dequant[0], eob);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
|
2016-07-27 05:43:23 +02:00
|
|
|
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
|
|
|
|
eob);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
|
2016-07-27 05:43:23 +02:00
|
|
|
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
|
|
|
|
eob);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
|
2016-07-27 05:43:23 +02:00
|
|
|
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
|
|
|
|
eob);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
|
2014-05-30 03:14:17 +02:00
|
|
|
switch (tx_size) {
|
|
|
|
case TX_32X32:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct32x32_1(src_diff, coeff, diff_stride);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
|
|
|
|
qcoeff, dqcoeff, pd->dequant[0], eob);
|
2014-05-30 03:14:17 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct16x16_1(src_diff, coeff, diff_stride);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
|
|
|
|
qcoeff, dqcoeff, pd->dequant[0], eob);
|
2014-05-30 03:14:17 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct8x8_1(src_diff, coeff, diff_stride);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
|
|
|
|
qcoeff, dqcoeff, pd->dequant[0], eob);
|
2014-05-30 03:14:17 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
|
|
|
|
qcoeff, dqcoeff, pd->dequant[0], eob);
|
2014-08-04 17:52:53 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0); break;
|
2014-05-30 03:14:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-01 21:20:45 +02:00
|
|
|
void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
|
2014-02-09 04:30:45 +01:00
|
|
|
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
|
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
2014-02-27 21:02:09 +01:00
|
|
|
const struct macroblock_plane *const p = &x->plane[plane];
|
|
|
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
|
|
|
const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
|
|
|
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
|
|
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
2014-02-27 21:02:09 +01:00
|
|
|
uint16_t *const eob = &p->eobs[block];
|
2013-11-22 20:45:30 +01:00
|
|
|
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
|
2014-02-27 21:02:09 +01:00
|
|
|
const int16_t *src_diff;
|
2016-07-01 21:20:45 +02:00
|
|
|
src_diff = &p->src_diff[4 * (row * diff_stride + col)];
|
2017-08-16 22:34:14 +02:00
|
|
|
// skip block condition should be handled before this is called.
|
|
|
|
assert(!x->skip_block);
|
2013-07-11 22:01:44 +02:00
|
|
|
|
2014-09-24 15:36:34 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
2016-07-27 05:43:23 +02:00
|
|
|
switch (tx_size) {
|
2014-09-24 15:36:34 +02:00
|
|
|
case TX_32X32:
|
2014-10-08 21:43:22 +02:00
|
|
|
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->round, p->quant, p->quant_shift, qcoeff,
|
2016-07-27 05:43:23 +02:00
|
|
|
dqcoeff, pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2016-07-27 05:43:23 +02:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2016-07-27 05:43:23 +02:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2016-07-27 05:43:23 +02:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
|
2013-07-11 22:01:44 +02:00
|
|
|
switch (tx_size) {
|
2013-04-30 20:29:27 +02:00
|
|
|
case TX_32X32:
|
2014-02-17 12:34:02 +01:00
|
|
|
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
2013-07-11 22:01:44 +02:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2014-12-22 18:35:29 +01:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
2013-12-13 20:05:26 +01:00
|
|
|
scan_order->iscan);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct16x16(src_diff, coeff, diff_stride);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
|
|
|
|
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
|
2013-12-13 20:05:26 +01:00
|
|
|
scan_order->scan, scan_order->iscan);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_fdct8x8(src_diff, coeff, diff_stride);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
|
|
|
|
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
|
2013-12-13 20:05:26 +01:00
|
|
|
scan_order->scan, scan_order->iscan);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
|
|
|
|
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
|
2013-12-13 20:05:26 +01:00
|
|
|
scan_order->scan, scan_order->iscan);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0); break;
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2013-05-03 02:05:14 +02:00
|
|
|
}
|
|
|
|
|
2016-07-01 21:20:45 +02:00
|
|
|
static void encode_block(int plane, int block, int row, int col,
|
2016-07-27 05:43:23 +02:00
|
|
|
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
|
2013-05-31 21:30:32 +02:00
|
|
|
struct encode_b_args *const args = arg;
|
|
|
|
MACROBLOCK *const x = args->x;
|
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
2013-12-04 02:59:32 +01:00
|
|
|
struct macroblock_plane *const p = &x->plane[plane];
|
2013-05-31 21:30:32 +02:00
|
|
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
2013-11-14 04:33:12 +01:00
|
|
|
uint8_t *dst;
|
2014-03-06 21:47:55 +01:00
|
|
|
ENTROPY_CONTEXT *a, *l;
|
2016-07-01 21:20:45 +02:00
|
|
|
dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
|
2016-07-06 22:56:02 +02:00
|
|
|
a = &args->ta[col];
|
|
|
|
l = &args->tl[row];
|
2013-09-21 01:29:24 +02:00
|
|
|
|
|
|
|
// TODO(jingning): per transformed block zero forcing only enabled for
|
|
|
|
// luma component. will integrate chroma components as well.
|
|
|
|
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
|
2013-12-04 02:59:32 +01:00
|
|
|
p->eobs[block] = 0;
|
2014-03-06 21:47:55 +01:00
|
|
|
*a = *l = 0;
|
2013-09-21 01:29:24 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-08-07 01:11:22 +02:00
|
|
|
if (!x->skip_recode) {
|
Rework forward txfm/quantization skip system in RTC coding mode
This commit allows more aggressive decision to skip forward
transform and quantization for luma component in RTC coding mode.
The chroma components remains going through the normal coding
routine, since they are not included in the non-RD mode search
process.
It reduces the runtime cost by 2% - 10%. In speed -6,
vidyo1 1000 kbps
16576 b/f, 40.281 dB, 8402 ms -> 16576 b/f, 40.323 dB, 7764 ms
nik720p 1000 kbps
33337 b/f, 38.622 dB, 7473 ms -> 33299 b/f, 38.660 dB, 7314 ms
dark720p 1000 kbps
33330 b/f, 39.785 dB, 13505 ms -> 33325 b/f, 39.714 dB, 13105 ms
The compression performance of speed -6 is improved by 0.44% in
PSNR and 1.31% in SSIM.
Change-Id: Iae9e3738de6255babea734e5897f29118bebc6d7
2014-11-21 21:18:53 +01:00
|
|
|
if (x->quant_fp) {
|
|
|
|
// Encoding process for rtc mode
|
2015-07-30 20:52:28 +02:00
|
|
|
if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
|
2014-08-28 18:09:37 +02:00
|
|
|
// skip forward transform
|
|
|
|
p->eobs[block] = 0;
|
|
|
|
*a = *l = 0;
|
|
|
|
return;
|
Rework forward txfm/quantization skip system in RTC coding mode
This commit allows more aggressive decision to skip forward
transform and quantization for luma component in RTC coding mode.
The chroma components remains going through the normal coding
routine, since they are not included in the non-RD mode search
process.
It reduces the runtime cost by 2% - 10%. In speed -6,
vidyo1 1000 kbps
16576 b/f, 40.281 dB, 8402 ms -> 16576 b/f, 40.323 dB, 7764 ms
nik720p 1000 kbps
33337 b/f, 38.622 dB, 7473 ms -> 33299 b/f, 38.660 dB, 7314 ms
dark720p 1000 kbps
33330 b/f, 39.785 dB, 13505 ms -> 33325 b/f, 39.714 dB, 13105 ms
The compression performance of speed -6 is improved by 0.44% in
PSNR and 1.31% in SSIM.
Change-Id: Iae9e3738de6255babea734e5897f29118bebc6d7
2014-11-21 21:18:53 +01:00
|
|
|
} else {
|
2016-07-01 21:20:45 +02:00
|
|
|
vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
|
2014-08-28 18:09:37 +02:00
|
|
|
}
|
2014-08-07 01:11:22 +02:00
|
|
|
} else {
|
Rework forward txfm/quantization skip system in RTC coding mode
This commit allows more aggressive decision to skip forward
transform and quantization for luma component in RTC coding mode.
The chroma components remains going through the normal coding
routine, since they are not included in the non-RD mode search
process.
It reduces the runtime cost by 2% - 10%. In speed -6,
vidyo1 1000 kbps
16576 b/f, 40.281 dB, 8402 ms -> 16576 b/f, 40.323 dB, 7764 ms
nik720p 1000 kbps
33337 b/f, 38.622 dB, 7473 ms -> 33299 b/f, 38.660 dB, 7314 ms
dark720p 1000 kbps
33330 b/f, 39.785 dB, 13505 ms -> 33325 b/f, 39.714 dB, 13105 ms
The compression performance of speed -6 is improved by 0.44% in
PSNR and 1.31% in SSIM.
Change-Id: Iae9e3738de6255babea734e5897f29118bebc6d7
2014-11-21 21:18:53 +01:00
|
|
|
if (max_txsize_lookup[plane_bsize] == tx_size) {
|
|
|
|
int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
|
2015-07-30 20:52:28 +02:00
|
|
|
if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
|
Rework forward txfm/quantization skip system in RTC coding mode
This commit allows more aggressive decision to skip forward
transform and quantization for luma component in RTC coding mode.
The chroma components remains going through the normal coding
routine, since they are not included in the non-RD mode search
process.
It reduces the runtime cost by 2% - 10%. In speed -6,
vidyo1 1000 kbps
16576 b/f, 40.281 dB, 8402 ms -> 16576 b/f, 40.323 dB, 7764 ms
nik720p 1000 kbps
33337 b/f, 38.622 dB, 7473 ms -> 33299 b/f, 38.660 dB, 7314 ms
dark720p 1000 kbps
33330 b/f, 39.785 dB, 13505 ms -> 33325 b/f, 39.714 dB, 13105 ms
The compression performance of speed -6 is improved by 0.44% in
PSNR and 1.31% in SSIM.
Change-Id: Iae9e3738de6255babea734e5897f29118bebc6d7
2014-11-21 21:18:53 +01:00
|
|
|
// full forward transform and quantization
|
2016-07-01 21:20:45 +02:00
|
|
|
vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
|
2015-07-30 20:52:28 +02:00
|
|
|
} else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
|
Rework forward txfm/quantization skip system in RTC coding mode
This commit allows more aggressive decision to skip forward
transform and quantization for luma component in RTC coding mode.
The chroma components remains going through the normal coding
routine, since they are not included in the non-RD mode search
process.
It reduces the runtime cost by 2% - 10%. In speed -6,
vidyo1 1000 kbps
16576 b/f, 40.281 dB, 8402 ms -> 16576 b/f, 40.323 dB, 7764 ms
nik720p 1000 kbps
33337 b/f, 38.622 dB, 7473 ms -> 33299 b/f, 38.660 dB, 7314 ms
dark720p 1000 kbps
33330 b/f, 39.785 dB, 13505 ms -> 33325 b/f, 39.714 dB, 13105 ms
The compression performance of speed -6 is improved by 0.44% in
PSNR and 1.31% in SSIM.
Change-Id: Iae9e3738de6255babea734e5897f29118bebc6d7
2014-11-21 21:18:53 +01:00
|
|
|
// fast path forward transform and quantization
|
2016-07-01 21:20:45 +02:00
|
|
|
vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
|
Rework forward txfm/quantization skip system in RTC coding mode
This commit allows more aggressive decision to skip forward
transform and quantization for luma component in RTC coding mode.
The chroma components remains going through the normal coding
routine, since they are not included in the non-RD mode search
process.
It reduces the runtime cost by 2% - 10%. In speed -6,
vidyo1 1000 kbps
16576 b/f, 40.281 dB, 8402 ms -> 16576 b/f, 40.323 dB, 7764 ms
nik720p 1000 kbps
33337 b/f, 38.622 dB, 7473 ms -> 33299 b/f, 38.660 dB, 7314 ms
dark720p 1000 kbps
33330 b/f, 39.785 dB, 13505 ms -> 33325 b/f, 39.714 dB, 13105 ms
The compression performance of speed -6 is improved by 0.44% in
PSNR and 1.31% in SSIM.
Change-Id: Iae9e3738de6255babea734e5897f29118bebc6d7
2014-11-21 21:18:53 +01:00
|
|
|
} else {
|
|
|
|
// skip forward transform
|
|
|
|
p->eobs[block] = 0;
|
|
|
|
*a = *l = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
2016-07-01 21:20:45 +02:00
|
|
|
vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
|
Rework forward txfm/quantization skip system in RTC coding mode
This commit allows more aggressive decision to skip forward
transform and quantization for luma component in RTC coding mode.
The chroma components remains going through the normal coding
routine, since they are not included in the non-RD mode search
process.
It reduces the runtime cost by 2% - 10%. In speed -6,
vidyo1 1000 kbps
16576 b/f, 40.281 dB, 8402 ms -> 16576 b/f, 40.323 dB, 7764 ms
nik720p 1000 kbps
33337 b/f, 38.622 dB, 7473 ms -> 33299 b/f, 38.660 dB, 7314 ms
dark720p 1000 kbps
33330 b/f, 39.785 dB, 13505 ms -> 33325 b/f, 39.714 dB, 13105 ms
The compression performance of speed -6 is improved by 0.44% in
PSNR and 1.31% in SSIM.
Change-Id: Iae9e3738de6255babea734e5897f29118bebc6d7
2014-11-21 21:18:53 +01:00
|
|
|
}
|
2014-07-02 01:10:44 +02:00
|
|
|
}
|
2014-05-30 03:14:17 +02:00
|
|
|
}
|
2013-05-03 02:05:14 +02:00
|
|
|
|
2013-11-07 23:56:58 +01:00
|
|
|
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
|
2014-04-10 20:19:38 +02:00
|
|
|
const int ctx = combine_entropy_contexts(*a, *l);
|
2016-07-06 19:05:51 +02:00
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
|
2013-11-06 06:07:08 +01:00
|
|
|
} else {
|
2014-03-06 21:47:55 +01:00
|
|
|
*a = *l = p->eobs[block] > 0;
|
2013-11-06 06:07:08 +01:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
if (p->eobs[block]) *(args->skip) = 0;
|
2014-01-15 02:58:25 +01:00
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
if (x->skip_encode || p->eobs[block] == 0) return;
|
2014-09-24 15:36:34 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
2017-05-03 22:32:08 +02:00
|
|
|
uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
|
2014-09-24 15:36:34 +02:00
|
|
|
switch (tx_size) {
|
|
|
|
case TX_32X32:
|
2017-05-02 19:44:12 +02:00
|
|
|
vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
2016-07-27 05:43:23 +02:00
|
|
|
xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2017-05-02 19:44:12 +02:00
|
|
|
vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
2016-07-27 05:43:23 +02:00
|
|
|
xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2017-05-02 19:44:12 +02:00
|
|
|
vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
2016-07-27 05:43:23 +02:00
|
|
|
xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
|
|
|
// this is like vp9_short_idct4x4 but has a special case around eob<=1
|
|
|
|
// which is significant (not just an optimization) for the lossless
|
|
|
|
// case.
|
2017-07-01 00:29:46 +02:00
|
|
|
x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
|
|
|
xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0 && "Invalid transform size");
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
2013-07-15 20:28:46 +02:00
|
|
|
|
2013-08-14 20:39:31 +02:00
|
|
|
switch (tx_size) {
|
2013-04-30 20:29:27 +02:00
|
|
|
case TX_32X32:
|
2013-12-04 02:59:32 +01:00
|
|
|
vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2013-12-04 02:59:32 +01:00
|
|
|
vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2013-12-04 02:59:32 +01:00
|
|
|
vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2013-07-11 18:09:41 +02:00
|
|
|
// this is like vp9_short_idct4x4 but has a special case around eob<=1
|
|
|
|
// which is significant (not just an optimization) for the lossless
|
|
|
|
// case.
|
2017-07-01 00:29:46 +02:00
|
|
|
x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
2013-04-30 20:29:27 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0 && "Invalid transform size"); break;
|
2013-04-30 20:29:27 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2014-03-15 01:05:35 +01:00
|
|
|
|
2016-07-01 21:20:45 +02:00
|
|
|
static void encode_block_pass1(int plane, int block, int row, int col,
|
2016-07-27 05:43:23 +02:00
|
|
|
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
|
|
|
|
void *arg) {
|
2014-02-26 22:51:12 +01:00
|
|
|
MACROBLOCK *const x = (MACROBLOCK *)arg;
|
2013-10-23 20:09:27 +02:00
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
2013-12-04 02:59:32 +01:00
|
|
|
struct macroblock_plane *const p = &x->plane[plane];
|
2013-10-23 20:09:27 +02:00
|
|
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
2013-11-14 22:45:51 +01:00
|
|
|
uint8_t *dst;
|
2016-07-01 21:20:45 +02:00
|
|
|
dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
|
2013-10-23 20:09:27 +02:00
|
|
|
|
2016-07-01 21:20:45 +02:00
|
|
|
vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
|
2013-10-23 20:09:27 +02:00
|
|
|
|
2014-09-24 15:36:34 +02:00
|
|
|
if (p->eobs[block] > 0) {
|
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
2017-07-01 00:29:46 +02:00
|
|
|
x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
|
|
|
|
p->eobs[block], xd->bd);
|
2016-07-27 05:43:23 +02:00
|
|
|
return;
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
2017-07-01 00:29:46 +02:00
|
|
|
x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
2013-10-23 20:09:27 +02:00
|
|
|
}
|
|
|
|
|
2014-02-26 22:51:12 +01:00
|
|
|
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
|
2014-02-17 13:57:40 +01:00
|
|
|
vp9_subtract_plane(x, bsize, 0);
|
2014-02-26 22:51:12 +01:00
|
|
|
vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
|
|
|
|
encode_block_pass1, x);
|
2013-05-03 02:05:14 +02:00
|
|
|
}
|
|
|
|
|
2013-08-27 20:05:08 +02:00
|
|
|
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
|
2013-05-31 21:30:32 +02:00
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
2013-04-30 20:29:27 +02:00
|
|
|
struct optimize_ctx ctx;
|
2016-01-20 01:40:20 +01:00
|
|
|
MODE_INFO *mi = xd->mi[0];
|
2016-07-27 05:43:23 +02:00
|
|
|
struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
|
2014-02-17 13:57:40 +01:00
|
|
|
int plane;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2016-01-20 01:40:20 +01:00
|
|
|
mi->skip = 1;
|
2014-09-13 00:58:17 +02:00
|
|
|
|
2016-07-27 05:43:23 +02:00
|
|
|
if (x->skip) return;
|
2014-09-13 00:58:17 +02:00
|
|
|
|
2014-02-17 13:57:40 +01:00
|
|
|
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
|
2016-07-27 05:43:23 +02:00
|
|
|
if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
|
2013-07-24 00:53:09 +02:00
|
|
|
|
2014-02-27 23:54:20 +01:00
|
|
|
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
|
2016-07-27 05:43:23 +02:00
|
|
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
2016-01-20 01:40:20 +01:00
|
|
|
const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
|
2016-07-27 05:43:23 +02:00
|
|
|
vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
|
|
|
|
ctx.tl[plane]);
|
2016-07-06 22:56:02 +02:00
|
|
|
arg.enable_coeff_opt = 1;
|
|
|
|
} else {
|
|
|
|
arg.enable_coeff_opt = 0;
|
2014-02-27 23:54:20 +01:00
|
|
|
}
|
2016-07-06 22:56:02 +02:00
|
|
|
arg.ta = ctx.ta[plane];
|
|
|
|
arg.tl = ctx.tl[plane];
|
2013-05-14 17:58:13 +02:00
|
|
|
|
2014-02-17 13:57:40 +01:00
|
|
|
vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
|
|
|
|
&arg);
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2013-05-16 02:21:15 +02:00
|
|
|
|
2016-07-01 21:20:45 +02:00
|
|
|
void vp9_encode_block_intra(int plane, int block, int row, int col,
|
2016-07-27 05:43:23 +02:00
|
|
|
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
|
|
|
|
void *arg) {
|
|
|
|
struct encode_b_args *const args = arg;
|
2013-05-31 21:30:32 +02:00
|
|
|
MACROBLOCK *const x = args->x;
|
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
2016-01-20 01:40:20 +01:00
|
|
|
MODE_INFO *mi = xd->mi[0];
|
2013-05-31 21:30:32 +02:00
|
|
|
struct macroblock_plane *const p = &x->plane[plane];
|
|
|
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
|
|
|
|
tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
|
|
|
tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
2013-12-13 20:05:26 +01:00
|
|
|
const scan_order *scan_order;
|
2015-07-17 02:37:16 +02:00
|
|
|
TX_TYPE tx_type = DCT_DCT;
|
2014-04-12 00:26:24 +02:00
|
|
|
PREDICTION_MODE mode;
|
2014-10-07 21:30:33 +02:00
|
|
|
const int bwl = b_width_log2_lookup[plane_bsize];
|
2013-11-20 22:58:21 +01:00
|
|
|
const int diff_stride = 4 * (1 << bwl);
|
2013-07-11 20:35:13 +02:00
|
|
|
uint8_t *src, *dst;
|
|
|
|
int16_t *src_diff;
|
2013-12-04 02:59:32 +01:00
|
|
|
uint16_t *eob = &p->eobs[block];
|
2014-02-17 12:34:02 +01:00
|
|
|
const int src_stride = p->src.stride;
|
|
|
|
const int dst_stride = pd->dst.stride;
|
2016-04-25 21:40:56 +02:00
|
|
|
ENTROPY_CONTEXT *a = NULL;
|
|
|
|
ENTROPY_CONTEXT *l = NULL;
|
|
|
|
int entropy_ctx = 0;
|
2016-07-01 21:20:45 +02:00
|
|
|
dst = &pd->dst.buf[4 * (row * dst_stride + col)];
|
|
|
|
src = &p->src.buf[4 * (row * src_stride + col)];
|
|
|
|
src_diff = &p->src_diff[4 * (row * diff_stride + col)];
|
2016-07-06 22:56:02 +02:00
|
|
|
if (args->enable_coeff_opt) {
|
|
|
|
a = &args->ta[col];
|
|
|
|
l = &args->tl[row];
|
2016-04-25 21:40:56 +02:00
|
|
|
entropy_ctx = combine_entropy_contexts(*a, *l);
|
|
|
|
}
|
2013-05-16 02:21:15 +02:00
|
|
|
|
2015-07-17 02:37:16 +02:00
|
|
|
if (tx_size == TX_4X4) {
|
2015-09-29 19:40:27 +02:00
|
|
|
tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
|
2015-07-17 02:37:16 +02:00
|
|
|
scan_order = &vp9_scan_orders[TX_4X4][tx_type];
|
2016-01-20 01:40:20 +01:00
|
|
|
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
|
2015-07-17 02:37:16 +02:00
|
|
|
} else {
|
2016-01-20 01:40:20 +01:00
|
|
|
mode = plane == 0 ? mi->mode : mi->uv_mode;
|
2015-07-17 02:37:16 +02:00
|
|
|
if (tx_size == TX_32X32) {
|
|
|
|
scan_order = &vp9_default_scan_orders[TX_32X32];
|
|
|
|
} else {
|
2015-09-29 19:40:27 +02:00
|
|
|
tx_type = get_tx_type(get_plane_type(plane), xd);
|
2015-07-17 02:37:16 +02:00
|
|
|
scan_order = &vp9_scan_orders[tx_size][tx_type];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-27 14:15:43 +01:00
|
|
|
vp9_predict_intra_block(
|
|
|
|
xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
|
|
|
|
(x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
|
|
|
|
dst_stride, col, row, plane);
|
2015-07-17 02:37:16 +02:00
|
|
|
|
2017-08-16 22:34:14 +02:00
|
|
|
// skip block condition should be handled before this is called.
|
|
|
|
assert(!x->skip_block);
|
|
|
|
|
2014-09-24 15:36:34 +02:00
|
|
|
#if CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
2017-05-03 22:32:08 +02:00
|
|
|
uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
|
2014-09-24 15:36:34 +02:00
|
|
|
switch (tx_size) {
|
|
|
|
case TX_32X32:
|
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
|
|
|
|
src_stride, dst, dst_stride, xd->bd);
|
2014-10-08 21:43:22 +02:00
|
|
|
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->round, p->quant, p->quant_shift,
|
2014-12-22 18:35:29 +01:00
|
|
|
qcoeff, dqcoeff, pd->dequant, eob,
|
2014-10-08 21:43:22 +02:00
|
|
|
scan_order->scan, scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
2017-04-26 12:40:58 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
|
|
|
}
|
2014-09-24 15:36:34 +02:00
|
|
|
if (!x->skip_encode && *eob) {
|
2017-05-02 19:44:12 +02:00
|
|
|
vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TX_16X16:
|
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
|
|
|
|
src_stride, dst, dst_stride, xd->bd);
|
2015-07-20 19:26:04 +02:00
|
|
|
if (tx_type == DCT_DCT)
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
|
2015-07-20 19:26:04 +02:00
|
|
|
else
|
|
|
|
vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2016-07-27 05:43:23 +02:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
2017-04-26 12:40:58 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
|
|
|
}
|
2014-09-24 15:36:34 +02:00
|
|
|
if (!x->skip_encode && *eob) {
|
2017-05-02 19:44:12 +02:00
|
|
|
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
|
2016-07-27 05:43:23 +02:00
|
|
|
xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TX_8X8:
|
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
|
|
|
|
src_stride, dst, dst_stride, xd->bd);
|
2015-07-20 19:26:04 +02:00
|
|
|
if (tx_type == DCT_DCT)
|
2015-07-29 00:57:40 +02:00
|
|
|
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
|
2015-07-20 19:26:04 +02:00
|
|
|
else
|
|
|
|
vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2016-07-27 05:43:23 +02:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
2017-04-26 12:40:58 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
|
|
|
}
|
2014-09-24 15:36:34 +02:00
|
|
|
if (!x->skip_encode && *eob) {
|
2017-05-02 19:44:12 +02:00
|
|
|
vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
|
2014-10-08 21:43:22 +02:00
|
|
|
xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TX_4X4:
|
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
|
|
|
|
src_stride, dst, dst_stride, xd->bd);
|
2014-09-24 15:36:34 +02:00
|
|
|
if (tx_type != DCT_DCT)
|
2014-10-08 21:43:22 +02:00
|
|
|
vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
|
2014-09-24 15:36:34 +02:00
|
|
|
else
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
|
2014-10-08 21:43:22 +02:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2016-07-27 05:43:23 +02:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
|
|
|
scan_order->iscan);
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
2017-04-26 12:40:58 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
|
|
|
}
|
2014-09-24 15:36:34 +02:00
|
|
|
if (!x->skip_encode && *eob) {
|
2014-10-08 21:43:22 +02:00
|
|
|
if (tx_type == DCT_DCT) {
|
2014-09-24 15:36:34 +02:00
|
|
|
// this is like vp9_short_idct4x4 but has a special case around
|
|
|
|
// eob<=1 which is significant (not just an optimization) for the
|
|
|
|
// lossless case.
|
2017-07-01 00:29:46 +02:00
|
|
|
x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
|
2014-10-08 21:43:22 +02:00
|
|
|
} else {
|
2017-05-02 19:44:12 +02:00
|
|
|
vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
|
|
|
|
xd->bd);
|
2014-10-08 21:43:22 +02:00
|
|
|
}
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0); return;
|
2014-09-24 15:36:34 +02:00
|
|
|
}
|
2016-07-27 05:43:23 +02:00
|
|
|
if (*eob) *(args->skip) = 0;
|
2014-09-24 15:36:34 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
|
|
2013-07-11 20:35:13 +02:00
|
|
|
switch (tx_size) {
|
2013-05-16 02:21:15 +02:00
|
|
|
case TX_32X32:
|
2013-11-07 23:56:58 +01:00
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
|
|
|
|
dst_stride);
|
2014-02-17 12:34:02 +01:00
|
|
|
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
2013-11-07 23:56:58 +01:00
|
|
|
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
2014-12-22 18:35:29 +01:00
|
|
|
pd->dequant, eob, scan_order->scan,
|
2013-12-13 20:05:26 +01:00
|
|
|
scan_order->iscan);
|
2013-11-07 23:56:58 +01:00
|
|
|
}
|
2016-07-06 22:56:02 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
2016-04-25 21:40:56 +02:00
|
|
|
}
|
2013-07-23 19:02:43 +02:00
|
|
|
if (!x->skip_encode && *eob)
|
2014-02-17 12:34:02 +01:00
|
|
|
vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
|
2013-05-16 02:21:15 +02:00
|
|
|
break;
|
|
|
|
case TX_16X16:
|
2013-11-07 23:56:58 +01:00
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
|
|
|
|
dst_stride);
|
2014-02-06 20:54:15 +01:00
|
|
|
vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
|
|
|
|
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
|
|
|
|
scan_order->scan, scan_order->iscan);
|
2013-11-07 23:56:58 +01:00
|
|
|
}
|
2016-07-06 22:56:02 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
2016-07-06 19:05:51 +02:00
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
2016-04-25 21:40:56 +02:00
|
|
|
}
|
2013-10-02 23:13:33 +02:00
|
|
|
if (!x->skip_encode && *eob)
|
2014-02-17 12:34:02 +01:00
|
|
|
vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
|
2013-05-16 02:21:15 +02:00
|
|
|
break;
|
|
|
|
case TX_8X8:
|
2013-11-07 23:56:58 +01:00
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
|
|
|
|
dst_stride);
|
2014-02-06 20:54:15 +01:00
|
|
|
vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
|
2016-07-27 05:43:23 +02:00
|
|
|
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
|
|
|
|
scan_order->scan, scan_order->iscan);
|
2013-11-07 23:56:58 +01:00
|
|
|
}
|
2016-07-06 22:56:02 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
2016-07-06 19:05:51 +02:00
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
2016-04-25 21:40:56 +02:00
|
|
|
}
|
2013-10-02 23:13:33 +02:00
|
|
|
if (!x->skip_encode && *eob)
|
2014-02-17 12:34:02 +01:00
|
|
|
vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
|
2013-05-16 02:21:15 +02:00
|
|
|
break;
|
|
|
|
case TX_4X4:
|
2013-11-07 23:56:58 +01:00
|
|
|
if (!x->skip_recode) {
|
2016-07-27 05:43:23 +02:00
|
|
|
vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
|
|
|
|
dst_stride);
|
2013-11-07 23:56:58 +01:00
|
|
|
if (tx_type != DCT_DCT)
|
2014-02-06 20:54:15 +01:00
|
|
|
vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
|
2013-11-07 23:56:58 +01:00
|
|
|
else
|
2017-06-30 00:07:55 +02:00
|
|
|
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
|
2015-08-04 18:24:52 +02:00
|
|
|
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
|
2016-07-27 05:43:23 +02:00
|
|
|
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
|
|
|
|
scan_order->scan, scan_order->iscan);
|
2013-11-07 23:56:58 +01:00
|
|
|
}
|
2016-07-06 22:56:02 +02:00
|
|
|
if (args->enable_coeff_opt && !x->skip_recode) {
|
2016-07-06 19:05:51 +02:00
|
|
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
2016-04-25 21:40:56 +02:00
|
|
|
}
|
2013-07-23 19:02:43 +02:00
|
|
|
if (!x->skip_encode && *eob) {
|
2013-07-11 20:35:13 +02:00
|
|
|
if (tx_type == DCT_DCT)
|
|
|
|
// this is like vp9_short_idct4x4 but has a special case around eob<=1
|
|
|
|
// which is significant (not just an optimization) for the lossless
|
|
|
|
// case.
|
2017-07-01 00:29:46 +02:00
|
|
|
x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
|
2013-07-11 20:35:13 +02:00
|
|
|
else
|
2014-02-17 12:34:02 +01:00
|
|
|
vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
|
2013-07-11 20:35:13 +02:00
|
|
|
}
|
2013-05-16 02:21:15 +02:00
|
|
|
break;
|
2016-07-27 05:43:23 +02:00
|
|
|
default: assert(0); break;
|
2013-05-16 02:21:15 +02:00
|
|
|
}
|
2016-07-27 05:43:23 +02:00
|
|
|
if (*eob) *(args->skip) = 0;
|
2013-05-16 02:21:15 +02:00
|
|
|
}
|
|
|
|
|
2016-04-25 21:40:56 +02:00
|
|
|
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
|
|
|
|
int enable_optimize_b) {
|
2014-02-09 04:30:45 +01:00
|
|
|
const MACROBLOCKD *const xd = &x->e_mbd;
|
2016-04-25 21:40:56 +02:00
|
|
|
struct optimize_ctx ctx;
|
2016-07-27 05:43:23 +02:00
|
|
|
struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
|
|
|
|
ctx.tl[plane], &xd->mi[0]->skip };
|
2013-05-16 02:21:15 +02:00
|
|
|
|
2016-04-25 21:40:56 +02:00
|
|
|
if (enable_optimize_b && x->optimize &&
|
|
|
|
(!x->skip_recode || !x->skip_optimize)) {
|
2016-07-27 05:43:23 +02:00
|
|
|
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
|
|
|
const TX_SIZE tx_size =
|
|
|
|
plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
|
2016-04-25 21:40:56 +02:00
|
|
|
vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
|
2016-07-06 22:56:02 +02:00
|
|
|
} else {
|
|
|
|
arg.enable_coeff_opt = 0;
|
2016-04-25 21:40:56 +02:00
|
|
|
}
|
|
|
|
|
2014-10-22 22:01:40 +02:00
|
|
|
vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
|
|
|
|
vp9_encode_block_intra, &arg);
|
2013-05-16 02:21:15 +02:00
|
|
|
}
|