2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-09 17:29:20 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-09 17:29:20 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/decoder/vp9_onyxd_int.h"
|
2013-01-06 03:20:25 +01:00
|
|
|
#include "vp9/common/vp9_common.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_header.h"
|
|
|
|
#include "vp9/common/vp9_reconintra.h"
|
|
|
|
#include "vp9/common/vp9_reconinter.h"
|
2012-11-29 00:15:51 +01:00
|
|
|
#include "vp9/common/vp9_entropy.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/decoder/vp9_decodframe.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/decoder/vp9_detokenize.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_invtrans.h"
|
|
|
|
#include "vp9/common/vp9_alloccommon.h"
|
|
|
|
#include "vp9/common/vp9_entropymode.h"
|
|
|
|
#include "vp9/common/vp9_quant_common.h"
|
2012-12-03 23:19:49 +01:00
|
|
|
#include "vpx_scale/vpx_scale.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_setupintrarecon.h"
|
2010-09-09 20:42:48 +02:00
|
|
|
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/decoder/vp9_decodemv.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_extend.h"
|
|
|
|
#include "vp9/common/vp9_modecont.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
#include "vpx_mem/vpx_mem.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/decoder/vp9_dboolhuff.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_seg_common.h"
|
2013-02-07 00:30:21 +01:00
|
|
|
#include "vp9/common/vp9_tile_common.h"
|
2012-11-09 02:09:30 +01:00
|
|
|
#include "vp9_rtcd.h"
|
2011-10-05 12:26:00 +02:00
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
#include <assert.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
2012-06-06 00:25:07 +02:00
|
|
|
#define COEFCOUNT_TESTING
|
|
|
|
|
2013-02-20 19:16:24 +01:00
|
|
|
// #define DEC_DEBUG
|
2012-11-16 00:14:38 +01:00
|
|
|
#ifdef DEC_DEBUG
|
|
|
|
int dec_debug = 0;
|
|
|
|
#endif
|
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
|
|
|
|
static int read_le16(const uint8_t *p) {
|
|
|
|
return (p[1] << 8) | p[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
static int read_le32(const uint8_t *p) {
|
|
|
|
return (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
// len == 0 is not allowed
|
|
|
|
static int read_is_valid(const unsigned char *start, size_t len,
|
|
|
|
const unsigned char *end) {
|
|
|
|
return start + len > start && start + len <= end;
|
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
static int merge_index(int v, int n, int modulus) {
|
|
|
|
int max1 = (n - 1 - modulus / 2) / modulus + 1;
|
|
|
|
if (v < max1) v = v * modulus + modulus / 2;
|
|
|
|
else {
|
|
|
|
int w;
|
|
|
|
v -= max1;
|
|
|
|
w = v;
|
|
|
|
v += (v + modulus - modulus / 2) / modulus;
|
|
|
|
while (v % modulus == modulus / 2 ||
|
|
|
|
w != v - (v + modulus - modulus / 2) / modulus) v++;
|
|
|
|
}
|
|
|
|
return v;
|
2012-05-03 11:22:26 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
static int inv_remap_prob(int v, int m) {
|
|
|
|
const int n = 256;
|
|
|
|
const int modulus = MODULUS_PARAM;
|
2013-03-14 20:31:54 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
v = merge_index(v, n - 1, modulus);
|
|
|
|
if ((m << 1) <= n) {
|
2013-03-14 20:31:54 +01:00
|
|
|
return vp9_inv_recenter_nonneg(v + 1, m);
|
2012-07-14 00:21:29 +02:00
|
|
|
} else {
|
2013-03-14 20:31:54 +01:00
|
|
|
return n - 1 - vp9_inv_recenter_nonneg(v + 1, n - 1 - m);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-04-12 18:24:03 +02:00
|
|
|
}
|
2012-05-03 11:22:26 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static vp9_prob read_prob_diff_update(vp9_reader *const bc, int oldp) {
|
2012-10-30 22:51:31 +01:00
|
|
|
int delp = vp9_decode_term_subexp(bc, SUBEXP_PARAM, 255);
|
2012-10-31 22:40:53 +01:00
|
|
|
return (vp9_prob)inv_remap_prob(delp, oldp);
|
2012-05-03 11:22:26 +02:00
|
|
|
}
|
2012-04-12 18:24:03 +02:00
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_init_de_quantizer(VP9D_COMP *pbi) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2013-02-23 02:27:34 +01:00
|
|
|
int q;
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const pc = &pbi->common;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-02-23 02:27:34 +01:00
|
|
|
for (q = 0; q < QINDEX_RANGE; q++) {
|
|
|
|
pc->Y1dequant[q][0] = (int16_t)vp9_dc_quant(q, pc->y1dc_delta_q);
|
|
|
|
pc->UVdequant[q][0] = (int16_t)vp9_dc_uv_quant(q, pc->uvdc_delta_q);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
/* all the ac values =; */
|
|
|
|
for (i = 1; i < 16; i++) {
|
2012-12-18 19:49:10 +01:00
|
|
|
int rc = vp9_default_zig_zag1d_4x4[i];
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-02-23 02:27:34 +01:00
|
|
|
pc->Y1dequant[q][rc] = (int16_t)vp9_ac_yquant(q);
|
|
|
|
pc->UVdequant[q][rc] = (int16_t)vp9_ac_uv_quant(q, pc->uvac_delta_q);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
static int get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex) {
|
|
|
|
// Set the Q baseline allowing for any segment level adjustment
|
|
|
|
if (vp9_segfeature_active(mb, segment_id, SEG_LVL_ALT_Q)) {
|
|
|
|
if (mb->mb_segment_abs_delta == SEGMENT_ABSDATA)
|
|
|
|
return vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q); // Abs Value
|
|
|
|
else
|
|
|
|
return clamp(base_qindex + vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q),
|
|
|
|
0, MAXQ); // Delta Value
|
|
|
|
} else {
|
|
|
|
return base_qindex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *mb) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
VP9_COMMON *const pc = &pbi->common;
|
2013-03-14 20:31:54 +01:00
|
|
|
const int segment_id = mb->mode_info_context->mbmi.segment_id;
|
|
|
|
const int qindex = get_qindex(mb, segment_id, pc->base_qindex);
|
2013-03-12 01:02:27 +01:00
|
|
|
mb->q_index = qindex;
|
2013-02-23 02:27:34 +01:00
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
for (i = 0; i < 16; i++)
|
|
|
|
mb->block[i].dequant = pc->Y1dequant[qindex];
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
for (i = 16; i < 24; i++)
|
|
|
|
mb->block[i].dequant = pc->UVdequant[qindex];
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
if (mb->lossless) {
|
2013-02-23 02:27:34 +01:00
|
|
|
assert(qindex == 0);
|
2013-03-12 19:24:04 +01:00
|
|
|
mb->inv_txm4x4_1 = vp9_short_iwalsh4x4_1;
|
|
|
|
mb->inv_txm4x4 = vp9_short_iwalsh4x4;
|
2013-03-12 01:02:27 +01:00
|
|
|
mb->itxm_add = vp9_dequant_idct_add_lossless_c;
|
|
|
|
mb->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c;
|
|
|
|
mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c;
|
|
|
|
} else {
|
2013-03-12 19:24:04 +01:00
|
|
|
mb->inv_txm4x4_1 = vp9_short_idct4x4_1;
|
|
|
|
mb->inv_txm4x4 = vp9_short_idct4x4;
|
2013-03-12 01:02:27 +01:00
|
|
|
mb->itxm_add = vp9_dequant_idct_add;
|
|
|
|
mb->itxm_add_y_block = vp9_dequant_idct_add_y_block;
|
|
|
|
mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block;
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2010-10-28 01:04:02 +02:00
|
|
|
/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
|
|
|
|
* to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
|
|
|
|
*/
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|
|
|
int mb_row, int mb_col) {
|
2013-02-23 02:27:34 +01:00
|
|
|
BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
|
2013-02-23 02:27:34 +01:00
|
|
|
if (sb_type == BLOCK_SIZE_SB64X64) {
|
2013-01-06 03:20:25 +01:00
|
|
|
vp9_build_intra_predictors_sb64uv_s(xd);
|
|
|
|
vp9_build_intra_predictors_sb64y_s(xd);
|
2013-02-23 02:27:34 +01:00
|
|
|
} else if (sb_type == BLOCK_SIZE_SB32X32) {
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_build_intra_predictors_sbuv_s(xd);
|
|
|
|
vp9_build_intra_predictors_sby_s(xd);
|
2013-01-08 19:29:22 +01:00
|
|
|
} else {
|
2013-01-06 03:20:25 +01:00
|
|
|
vp9_build_intra_predictors_mbuv_s(xd);
|
|
|
|
vp9_build_intra_predictors_mby_s(xd);
|
2012-08-20 23:43:34 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
} else {
|
2013-02-23 02:27:34 +01:00
|
|
|
if (sb_type == BLOCK_SIZE_SB64X64) {
|
2013-01-06 03:20:25 +01:00
|
|
|
vp9_build_inter64x64_predictors_sb(xd,
|
|
|
|
xd->dst.y_buffer,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.y_stride,
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
xd->dst.uv_stride,
|
|
|
|
mb_row, mb_col);
|
2013-02-23 02:27:34 +01:00
|
|
|
} else if (sb_type == BLOCK_SIZE_SB32X32) {
|
2012-11-07 15:50:25 +01:00
|
|
|
vp9_build_inter32x32_predictors_sb(xd,
|
|
|
|
xd->dst.y_buffer,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.y_stride,
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
xd->dst.uv_stride,
|
|
|
|
mb_row, mb_col);
|
2013-01-08 19:29:22 +01:00
|
|
|
} else {
|
2013-02-09 02:49:44 +01:00
|
|
|
vp9_build_inter16x16_predictors_mb(xd,
|
|
|
|
xd->dst.y_buffer,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.y_stride,
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
xd->dst.uv_stride,
|
|
|
|
mb_row, mb_col);
|
2012-08-20 23:43:34 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 00:14:38 +01:00
|
|
|
static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|
|
|
BOOL_DECODER* const bc) {
|
2013-03-06 00:18:06 +01:00
|
|
|
TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
#if 0 // def DEC_DEBUG
|
2012-11-16 00:14:38 +01:00
|
|
|
if (dec_debug) {
|
|
|
|
int i;
|
|
|
|
printf("\n");
|
|
|
|
printf("qcoeff 16x16\n");
|
|
|
|
for (i = 0; i < 400; i++) {
|
|
|
|
printf("%3d ", xd->qcoeff[i]);
|
|
|
|
if (i % 16 == 15) printf("\n");
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
printf("predictor\n");
|
|
|
|
for (i = 0; i < 400; i++) {
|
|
|
|
printf("%3d ", xd->predictor[i]);
|
|
|
|
if (i % 16 == 15) printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (tx_type != DCT_DCT) {
|
|
|
|
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
|
|
|
|
xd->block[0].dequant, xd->predictor,
|
2012-12-13 00:49:39 +01:00
|
|
|
xd->dst.y_buffer, 16, xd->dst.y_stride,
|
2013-02-27 19:00:24 +01:00
|
|
|
xd->eobs[0]);
|
2012-11-16 00:14:38 +01:00
|
|
|
} else {
|
|
|
|
vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
|
|
|
|
xd->predictor, xd->dst.y_buffer,
|
2013-02-27 19:00:24 +01:00
|
|
|
16, xd->dst.y_stride, xd->eobs[0]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
vp9_dequant_idct_add_uv_block_8x8(
|
|
|
|
xd->qcoeff + 16 * 16, xd->block[16].dequant,
|
|
|
|
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd->dst.uv_stride, xd);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|
|
|
BOOL_DECODER* const bc) {
|
|
|
|
// First do Y
|
|
|
|
// if the first one is DCT_DCT assume all the rest are as well
|
2013-03-06 00:18:06 +01:00
|
|
|
TX_TYPE tx_type = get_tx_type_8x8(xd, 0);
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
#if 0 // def DEC_DEBUG
|
2012-11-16 00:14:38 +01:00
|
|
|
if (dec_debug) {
|
|
|
|
int i;
|
|
|
|
printf("\n");
|
|
|
|
printf("qcoeff 8x8\n");
|
2013-02-20 19:16:24 +01:00
|
|
|
for (i = 0; i < 384; i++) {
|
2012-11-16 00:14:38 +01:00
|
|
|
printf("%3d ", xd->qcoeff[i]);
|
|
|
|
if (i % 16 == 15) printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (tx_type != DCT_DCT || xd->mode_info_context->mbmi.mode == I8X8_PRED) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
int ib = vp9_i8x8_block[i];
|
|
|
|
int idx = (ib & 0x02) ? (ib + 2) : ib;
|
2012-12-19 00:31:19 +01:00
|
|
|
int16_t *q = xd->block[idx].qcoeff;
|
|
|
|
int16_t *dq = xd->block[0].dequant;
|
|
|
|
uint8_t *pre = xd->block[ib].predictor;
|
|
|
|
uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
|
2012-11-16 00:14:38 +01:00
|
|
|
int stride = xd->dst.y_stride;
|
|
|
|
BLOCKD *b = &xd->block[ib];
|
|
|
|
if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
|
|
|
|
int i8x8mode = b->bmi.as_mode.first;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
2013-03-06 00:18:06 +01:00
|
|
|
tx_type = get_tx_type_8x8(xd, ib);
|
2012-11-16 00:14:38 +01:00
|
|
|
if (tx_type != DCT_DCT) {
|
2012-12-13 00:49:39 +01:00
|
|
|
vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
|
2013-02-27 19:00:24 +01:00
|
|
|
xd->eobs[idx]);
|
2012-11-16 00:14:38 +01:00
|
|
|
} else {
|
|
|
|
vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
|
2013-02-27 19:00:24 +01:00
|
|
|
xd->eobs[idx]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
}
|
2013-02-15 19:15:42 +01:00
|
|
|
} else {
|
2012-11-16 00:14:38 +01:00
|
|
|
vp9_dequant_idct_add_y_block_8x8(xd->qcoeff,
|
|
|
|
xd->block[0].dequant,
|
|
|
|
xd->predictor,
|
|
|
|
xd->dst.y_buffer,
|
|
|
|
xd->dst.y_stride,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Now do UV
|
|
|
|
if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
int ib = vp9_i8x8_block[i];
|
|
|
|
BLOCKD *b = &xd->block[ib];
|
|
|
|
int i8x8mode = b->bmi.as_mode.first;
|
2013-03-14 20:31:54 +01:00
|
|
|
|
2012-11-16 00:14:38 +01:00
|
|
|
b = &xd->block[16 + i];
|
2013-03-14 20:31:54 +01:00
|
|
|
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
2013-02-28 22:01:41 +01:00
|
|
|
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
|
2013-03-14 20:31:54 +01:00
|
|
|
|
2012-11-16 00:14:38 +01:00
|
|
|
b = &xd->block[20 + i];
|
2013-03-14 20:31:54 +01:00
|
|
|
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
2013-02-28 22:01:41 +01:00
|
|
|
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
} else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
|
2012-11-16 00:14:38 +01:00
|
|
|
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd->dst.uv_stride, xd);
|
2012-11-16 00:14:38 +01:00
|
|
|
} else {
|
|
|
|
vp9_dequant_idct_add_uv_block_8x8
|
|
|
|
(xd->qcoeff + 16 * 16, xd->block[16].dequant,
|
|
|
|
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd->dst.uv_stride, xd);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
#if 0 // def DEC_DEBUG
|
2012-11-16 00:14:38 +01:00
|
|
|
if (dec_debug) {
|
|
|
|
int i;
|
|
|
|
printf("\n");
|
|
|
|
printf("predictor\n");
|
|
|
|
for (i = 0; i < 384; i++) {
|
|
|
|
printf("%3d ", xd->predictor[i]);
|
|
|
|
if (i % 16 == 15) printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
|
|
|
BOOL_DECODER* const bc) {
|
|
|
|
TX_TYPE tx_type;
|
|
|
|
int i, eobtotal = 0;
|
|
|
|
MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
#if 0 // def DEC_DEBUG
|
|
|
|
if (dec_debug) {
|
|
|
|
int i;
|
|
|
|
printf("\n");
|
|
|
|
printf("predictor\n");
|
|
|
|
for (i = 0; i < 384; i++) {
|
|
|
|
printf("%3d ", xd->predictor[i]);
|
|
|
|
if (i % 16 == 15) printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2012-11-16 00:14:38 +01:00
|
|
|
if (mode == I8X8_PRED) {
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
int ib = vp9_i8x8_block[i];
|
|
|
|
const int iblock[4] = {0, 1, 4, 5};
|
|
|
|
int j;
|
2013-03-12 01:02:27 +01:00
|
|
|
BLOCKD *b = &xd->block[ib];
|
|
|
|
int i8x8mode = b->bmi.as_mode.first;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
|
2012-11-16 00:14:38 +01:00
|
|
|
for (j = 0; j < 4; j++) {
|
|
|
|
b = &xd->block[ib + iblock[j]];
|
2013-03-06 00:18:06 +01:00
|
|
|
tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
|
2012-11-16 00:14:38 +01:00
|
|
|
if (tx_type != DCT_DCT) {
|
|
|
|
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
|
|
|
|
b->dequant, b->predictor,
|
|
|
|
*(b->base_dst) + b->dst, 16,
|
2013-02-27 19:00:24 +01:00
|
|
|
b->dst_stride, xd->eobs[ib + iblock[j]]);
|
2012-11-16 00:14:38 +01:00
|
|
|
} else {
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
2013-02-28 22:01:41 +01:00
|
|
|
*(b->base_dst) + b->dst, 16, b->dst_stride,
|
|
|
|
xd->eobs[ib + iblock[j]]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
b = &xd->block[16 + i];
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
2013-02-28 22:01:41 +01:00
|
|
|
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
b = &xd->block[20 + i];
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
2013-02-28 22:01:41 +01:00
|
|
|
*(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
} else if (mode == B_PRED) {
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
BLOCKD *b = &xd->block[i];
|
2013-03-12 01:02:27 +01:00
|
|
|
int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
|
2012-11-16 00:14:38 +01:00
|
|
|
#if CONFIG_NEWBINTRAMODES
|
|
|
|
xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context =
|
2013-03-16 17:26:52 +01:00
|
|
|
vp9_find_bpred_context(xd, b);
|
2012-11-16 00:14:38 +01:00
|
|
|
#endif
|
|
|
|
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
|
|
|
|
eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i);
|
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
vp9_intra4x4_predict(xd, b, b_mode, b->predictor);
|
2013-03-06 00:18:06 +01:00
|
|
|
tx_type = get_tx_type_4x4(xd, i);
|
2012-11-16 00:14:38 +01:00
|
|
|
if (tx_type != DCT_DCT) {
|
|
|
|
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
|
|
|
|
b->dequant, b->predictor,
|
2012-12-13 00:49:39 +01:00
|
|
|
*(b->base_dst) + b->dst, 16, b->dst_stride,
|
2013-02-27 19:00:24 +01:00
|
|
|
xd->eobs[i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
} else {
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
2013-02-28 22:01:41 +01:00
|
|
|
*(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
|
|
|
|
vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc);
|
|
|
|
}
|
|
|
|
vp9_build_intra_predictors_mbuv(xd);
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
|
2012-11-16 00:14:38 +01:00
|
|
|
xd->block[16].dequant,
|
|
|
|
xd->predictor + 16 * 16,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.uv_stride,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd);
|
2013-03-06 00:18:06 +01:00
|
|
|
} else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add_y_block(xd->qcoeff,
|
2012-11-16 00:14:38 +01:00
|
|
|
xd->block[0].dequant,
|
|
|
|
xd->predictor,
|
|
|
|
xd->dst.y_buffer,
|
|
|
|
xd->dst.y_stride,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd);
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
|
2012-11-16 00:14:38 +01:00
|
|
|
xd->block[16].dequant,
|
|
|
|
xd->predictor + 16 * 16,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.uv_stride,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd);
|
2012-11-16 00:14:38 +01:00
|
|
|
} else {
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
#if 0 // def DEC_DEBUG
|
2012-11-16 00:14:38 +01:00
|
|
|
if (dec_debug) {
|
|
|
|
int i;
|
|
|
|
printf("\n");
|
|
|
|
printf("qcoeff 4x4\n");
|
|
|
|
for (i = 0; i < 400; i++) {
|
|
|
|
printf("%3d ", xd->qcoeff[i]);
|
|
|
|
if (i % 16 == 15) printf("\n");
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
printf("predictor\n");
|
|
|
|
for (i = 0; i < 400; i++) {
|
|
|
|
printf("%3d ", xd->predictor[i]);
|
|
|
|
if (i % 16 == 15) printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2013-02-15 19:15:42 +01:00
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
BLOCKD *b = &xd->block[i];
|
2013-03-06 00:18:06 +01:00
|
|
|
tx_type = get_tx_type_4x4(xd, i);
|
2013-02-15 19:15:42 +01:00
|
|
|
if (tx_type != DCT_DCT) {
|
|
|
|
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
|
|
|
|
b->dequant, b->predictor,
|
|
|
|
*(b->base_dst) + b->dst, 16,
|
2013-02-27 19:00:24 +01:00
|
|
|
b->dst_stride, xd->eobs[i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
} else {
|
2013-02-15 19:15:42 +01:00
|
|
|
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
|
2013-02-28 22:01:41 +01:00
|
|
|
*(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
}
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
|
2012-11-16 00:14:38 +01:00
|
|
|
xd->block[16].dequant,
|
|
|
|
xd->predictor + 16 * 16,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.uv_stride,
|
2013-02-21 19:04:40 +01:00
|
|
|
xd);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
2013-01-18 18:44:23 +01:00
|
|
|
int mb_row, int mb_col,
|
2013-01-06 03:20:25 +01:00
|
|
|
BOOL_DECODER* const bc) {
|
2013-02-27 19:00:24 +01:00
|
|
|
int n, eobtotal;
|
2012-11-08 20:03:00 +01:00
|
|
|
VP9_COMMON *const pc = &pbi->common;
|
2013-03-04 23:12:17 +01:00
|
|
|
MODE_INFO *mi = xd->mode_info_context;
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
const int mis = pc->mode_info_stride;
|
2012-11-08 20:03:00 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64);
|
|
|
|
|
|
|
|
if (pbi->common.frame_type != KEY_FRAME)
|
|
|
|
vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc);
|
|
|
|
|
|
|
|
// re-initialize macroblock dequantizer before detokenization
|
|
|
|
if (xd->segmentation_enabled)
|
|
|
|
mb_init_dequantizer(pbi, xd);
|
|
|
|
|
|
|
|
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
|
2013-03-04 23:12:17 +01:00
|
|
|
vp9_reset_sb64_tokens_context(xd);
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
/* Special case: Force the loopfilter to skip when eobtotal and
|
|
|
|
* mb_skip_coeff are zero.
|
|
|
|
*/
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
skip_recon_mb(pbi, xd, mb_row, mb_col);
|
2013-01-06 03:20:25 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* do prediction */
|
|
|
|
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
|
|
|
|
vp9_build_intra_predictors_sb64y_s(xd);
|
|
|
|
vp9_build_intra_predictors_sb64uv_s(xd);
|
|
|
|
} else {
|
|
|
|
vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,
|
|
|
|
xd->dst.u_buffer, xd->dst.v_buffer,
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
xd->dst.y_stride, xd->dst.uv_stride,
|
|
|
|
mb_row, mb_col);
|
2013-01-06 03:20:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* dequantization and idct */
|
2013-03-04 23:12:17 +01:00
|
|
|
eobtotal = vp9_decode_sb64_tokens(pbi, xd, bc);
|
|
|
|
if (eobtotal == 0) { // skip loopfilter
|
2013-01-06 03:20:25 +01:00
|
|
|
for (n = 0; n < 16; n++) {
|
2013-03-04 23:12:17 +01:00
|
|
|
const int x_idx = n & 3, y_idx = n >> 2;
|
2013-01-06 03:20:25 +01:00
|
|
|
|
2013-03-04 23:12:17 +01:00
|
|
|
if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows)
|
|
|
|
mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (xd->mode_info_context->mbmi.txfm_size) {
|
|
|
|
case TX_32X32:
|
|
|
|
for (n = 0; n < 4; n++) {
|
|
|
|
const int x_idx = n & 1, y_idx = n >> 1;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32;
|
2013-03-04 23:12:17 +01:00
|
|
|
vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]);
|
|
|
|
}
|
|
|
|
vp9_dequant_idct_add_32x32(xd->qcoeff + 4096,
|
|
|
|
xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer,
|
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]);
|
|
|
|
vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024,
|
|
|
|
xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
|
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
|
|
|
|
break;
|
2013-03-06 00:18:06 +01:00
|
|
|
case TX_16X16:
|
2013-03-04 23:12:17 +01:00
|
|
|
for (n = 0; n < 16; n++) {
|
|
|
|
const int x_idx = n & 3, y_idx = n >> 2;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16;
|
2013-03-06 00:18:06 +01:00
|
|
|
const TX_TYPE tx_type = get_tx_type_16x16(xd,
|
|
|
|
(y_idx * 16 + x_idx) * 4);
|
2013-03-12 01:02:27 +01:00
|
|
|
|
2013-03-06 00:18:06 +01:00
|
|
|
if (tx_type == DCT_DCT) {
|
|
|
|
vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
|
|
|
|
} else {
|
|
|
|
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
|
|
|
|
}
|
2013-03-04 23:12:17 +01:00
|
|
|
}
|
|
|
|
for (n = 0; n < 4; n++) {
|
|
|
|
const int x_idx = n & 1, y_idx = n >> 1;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int uv_offset = y_idx * 16 * xd->dst.uv_stride + x_idx * 16;
|
2013-03-04 23:12:17 +01:00
|
|
|
vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + n * 256,
|
|
|
|
xd->block[16].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.u_buffer + uv_offset,
|
|
|
|
xd->dst.u_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 16]);
|
|
|
|
vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + 1024 + n * 256,
|
|
|
|
xd->block[20].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.v_buffer + uv_offset,
|
|
|
|
xd->dst.v_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]);
|
|
|
|
}
|
|
|
|
break;
|
2013-03-06 00:18:06 +01:00
|
|
|
case TX_8X8:
|
2013-03-04 23:12:17 +01:00
|
|
|
for (n = 0; n < 64; n++) {
|
|
|
|
const int x_idx = n & 7, y_idx = n >> 3;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int y_offset = y_idx * 8 * xd->dst.y_stride + x_idx * 8;
|
2013-03-06 00:18:06 +01:00
|
|
|
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
|
|
|
|
if (tx_type == DCT_DCT) {
|
|
|
|
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
|
|
|
|
} else {
|
|
|
|
vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
|
|
|
|
}
|
2013-03-04 23:12:17 +01:00
|
|
|
}
|
|
|
|
for (n = 0; n < 16; n++) {
|
|
|
|
const int x_idx = n & 3, y_idx = n >> 2;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int uv_offset = y_idx * 8 * xd->dst.uv_stride + x_idx * 8;
|
2013-03-04 23:12:17 +01:00
|
|
|
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096,
|
|
|
|
xd->block[16].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.u_buffer + uv_offset,
|
|
|
|
xd->dst.u_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 4]);
|
|
|
|
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096 + 1024,
|
|
|
|
xd->block[20].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.v_buffer + uv_offset,
|
|
|
|
xd->dst.v_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]);
|
|
|
|
}
|
|
|
|
break;
|
2013-03-06 00:18:06 +01:00
|
|
|
case TX_4X4:
|
2013-03-04 23:12:17 +01:00
|
|
|
for (n = 0; n < 256; n++) {
|
|
|
|
const int x_idx = n & 15, y_idx = n >> 4;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int y_offset = y_idx * 4 * xd->dst.y_stride + x_idx * 4;
|
2013-03-06 00:18:06 +01:00
|
|
|
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
|
|
|
|
if (tx_type == DCT_DCT) {
|
|
|
|
xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
|
|
|
|
} else {
|
|
|
|
vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
|
|
|
|
}
|
2013-03-04 23:12:17 +01:00
|
|
|
}
|
|
|
|
for (n = 0; n < 64; n++) {
|
|
|
|
const int x_idx = n & 7, y_idx = n >> 3;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int uv_offset = y_idx * 4 * xd->dst.uv_stride + x_idx * 4;
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->itxm_add(xd->qcoeff + 4096 + n * 16,
|
|
|
|
xd->block[16].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.u_buffer + uv_offset,
|
|
|
|
xd->dst.u_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n]);
|
|
|
|
xd->itxm_add(xd->qcoeff + 4096 + 1024 + n * 16,
|
|
|
|
xd->block[20].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.v_buffer + uv_offset,
|
|
|
|
xd->dst.v_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default: assert(0);
|
2013-01-06 03:20:25 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
2013-01-18 18:44:23 +01:00
|
|
|
int mb_row, int mb_col,
|
2013-01-06 03:20:25 +01:00
|
|
|
BOOL_DECODER* const bc) {
|
2013-02-27 19:00:24 +01:00
|
|
|
int n, eobtotal;
|
2013-01-06 03:20:25 +01:00
|
|
|
VP9_COMMON *const pc = &pbi->common;
|
|
|
|
const int mis = pc->mode_info_stride;
|
|
|
|
|
|
|
|
assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32);
|
2012-11-08 20:03:00 +01:00
|
|
|
|
2012-11-30 01:39:15 +01:00
|
|
|
if (pbi->common.frame_type != KEY_FRAME)
|
|
|
|
vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc);
|
|
|
|
|
2012-11-08 20:03:00 +01:00
|
|
|
// re-initialize macroblock dequantizer before detokenization
|
|
|
|
if (xd->segmentation_enabled)
|
|
|
|
mb_init_dequantizer(pbi, xd);
|
|
|
|
|
|
|
|
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
|
2013-03-04 23:12:17 +01:00
|
|
|
vp9_reset_sb_tokens_context(xd);
|
2012-11-08 20:03:00 +01:00
|
|
|
|
|
|
|
/* Special case: Force the loopfilter to skip when eobtotal and
|
|
|
|
* mb_skip_coeff are zero.
|
|
|
|
*/
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
skip_recon_mb(pbi, xd, mb_row, mb_col);
|
2012-11-08 20:03:00 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* do prediction */
|
|
|
|
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
|
|
|
|
vp9_build_intra_predictors_sby_s(xd);
|
|
|
|
vp9_build_intra_predictors_sbuv_s(xd);
|
|
|
|
} else {
|
|
|
|
vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
|
|
|
|
xd->dst.u_buffer, xd->dst.v_buffer,
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
xd->dst.y_stride, xd->dst.uv_stride,
|
|
|
|
mb_row, mb_col);
|
2012-11-08 20:03:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* dequantization and idct */
|
2013-03-04 23:12:17 +01:00
|
|
|
eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
|
|
|
|
if (eobtotal == 0) { // skip loopfilter
|
|
|
|
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
|
|
|
|
if (mb_col + 1 < pc->mb_cols)
|
|
|
|
xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
|
|
|
|
if (mb_row + 1 < pc->mb_rows) {
|
|
|
|
xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
if (mb_col + 1 < pc->mb_cols)
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
}
|
2013-01-10 17:23:59 +01:00
|
|
|
} else {
|
2013-03-04 23:12:17 +01:00
|
|
|
switch (xd->mode_info_context->mbmi.txfm_size) {
|
|
|
|
case TX_32X32:
|
|
|
|
vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant,
|
|
|
|
xd->dst.y_buffer, xd->dst.y_buffer,
|
|
|
|
xd->dst.y_stride, xd->dst.y_stride,
|
|
|
|
xd->eobs[0]);
|
|
|
|
vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
|
|
|
|
xd->block[16].dequant,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.uv_stride, xd);
|
|
|
|
break;
|
2013-03-06 00:18:06 +01:00
|
|
|
case TX_16X16:
|
2013-03-04 23:12:17 +01:00
|
|
|
for (n = 0; n < 4; n++) {
|
|
|
|
const int x_idx = n & 1, y_idx = n >> 1;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16;
|
2013-03-06 00:18:06 +01:00
|
|
|
const TX_TYPE tx_type = get_tx_type_16x16(xd,
|
|
|
|
(y_idx * 8 + x_idx) * 4);
|
|
|
|
if (tx_type == DCT_DCT) {
|
|
|
|
vp9_dequant_idct_add_16x16(
|
|
|
|
xd->qcoeff + n * 256, xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
|
|
|
|
} else {
|
|
|
|
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
|
|
|
|
}
|
2013-03-04 23:12:17 +01:00
|
|
|
}
|
|
|
|
vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
|
|
|
|
xd->block[16].dequant,
|
|
|
|
xd->dst.u_buffer,
|
|
|
|
xd->dst.v_buffer,
|
|
|
|
xd->dst.uv_stride, xd);
|
|
|
|
break;
|
2013-03-06 00:18:06 +01:00
|
|
|
case TX_8X8:
|
2013-03-04 23:12:17 +01:00
|
|
|
for (n = 0; n < 16; n++) {
|
|
|
|
const int x_idx = n & 3, y_idx = n >> 2;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int y_offset = y_idx * 8 * xd->dst.y_stride + x_idx * 8;
|
2013-03-06 00:18:06 +01:00
|
|
|
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
|
|
|
|
if (tx_type == DCT_DCT) {
|
|
|
|
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
|
|
|
|
} else {
|
|
|
|
vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
|
|
|
|
}
|
2013-03-04 23:12:17 +01:00
|
|
|
}
|
|
|
|
for (n = 0; n < 4; n++) {
|
|
|
|
const int x_idx = n & 1, y_idx = n >> 1;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int uv_offset = y_idx * 8 * xd->dst.uv_stride + x_idx * 8;
|
2013-03-04 23:12:17 +01:00
|
|
|
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1024,
|
|
|
|
xd->block[16].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.u_buffer + uv_offset,
|
|
|
|
xd->dst.u_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n * 4]);
|
|
|
|
vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1280,
|
|
|
|
xd->block[20].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.v_buffer + uv_offset,
|
|
|
|
xd->dst.v_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]);
|
|
|
|
}
|
|
|
|
break;
|
2013-03-06 00:18:06 +01:00
|
|
|
case TX_4X4:
|
2013-03-04 23:12:17 +01:00
|
|
|
for (n = 0; n < 64; n++) {
|
|
|
|
const int x_idx = n & 7, y_idx = n >> 3;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int y_offset = y_idx * 4 * xd->dst.y_stride + x_idx * 4;
|
|
|
|
|
2013-03-06 00:18:06 +01:00
|
|
|
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
|
|
|
|
if (tx_type == DCT_DCT) {
|
|
|
|
xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
|
|
|
|
} else {
|
|
|
|
vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
|
|
|
|
xd->block[0].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.y_buffer + y_offset,
|
|
|
|
xd->dst.y_buffer + y_offset,
|
2013-03-06 00:18:06 +01:00
|
|
|
xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
|
|
|
|
}
|
2013-03-04 23:12:17 +01:00
|
|
|
}
|
2013-03-12 01:02:27 +01:00
|
|
|
|
2013-03-04 23:12:17 +01:00
|
|
|
for (n = 0; n < 16; n++) {
|
|
|
|
const int x_idx = n & 3, y_idx = n >> 2;
|
2013-03-12 01:02:27 +01:00
|
|
|
const int uv_offset = y_idx * 4 * xd->dst.uv_stride + x_idx * 4;
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->itxm_add(xd->qcoeff + 1024 + n * 16,
|
|
|
|
xd->block[16].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.u_buffer + uv_offset,
|
|
|
|
xd->dst.u_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n]);
|
|
|
|
xd->itxm_add(xd->qcoeff + 1280 + n * 16,
|
|
|
|
xd->block[20].dequant,
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->dst.v_buffer + uv_offset,
|
|
|
|
xd->dst.v_buffer + uv_offset,
|
2013-03-04 23:12:17 +01:00
|
|
|
xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default: assert(0);
|
2012-11-08 20:03:00 +01:00
|
|
|
}
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
}
|
2012-11-08 20:03:00 +01:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
|
2012-10-30 01:58:18 +01:00
|
|
|
int mb_row, unsigned int mb_col,
|
2012-10-18 01:47:38 +02:00
|
|
|
BOOL_DECODER* const bc) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int eobtotal = 0;
|
|
|
|
MB_PREDICTION_MODE mode;
|
2012-10-16 01:41:41 +02:00
|
|
|
int tx_size;
|
2012-11-08 20:03:00 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
assert(!xd->mode_info_context->mbmi.sb_type);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-08-16 22:25:49 +02:00
|
|
|
// re-initialize macroblock dequantizer before detokenization
|
|
|
|
if (xd->segmentation_enabled)
|
|
|
|
mb_init_dequantizer(pbi, xd);
|
|
|
|
|
2012-10-16 01:41:41 +02:00
|
|
|
tx_size = xd->mode_info_context->mbmi.txfm_size;
|
2012-09-12 04:36:28 +02:00
|
|
|
mode = xd->mode_info_context->mbmi.mode;
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
|
2012-10-30 22:51:31 +01:00
|
|
|
vp9_reset_mb_tokens_context(xd);
|
2012-10-31 22:40:53 +01:00
|
|
|
} else if (!bool_error(bc)) {
|
2013-03-14 20:31:54 +01:00
|
|
|
if (mode != B_PRED)
|
2012-11-23 20:23:50 +01:00
|
|
|
eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-28 20:28:12 +02:00
|
|
|
|
2012-09-12 04:36:28 +02:00
|
|
|
//mode = xd->mode_info_context->mbmi.mode;
|
2012-07-18 22:43:01 +02:00
|
|
|
if (pbi->common.frame_type != KEY_FRAME)
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter,
|
2012-07-18 22:43:01 +02:00
|
|
|
&pbi->common);
|
2011-06-28 16:22:13 +02:00
|
|
|
|
2013-02-23 02:27:34 +01:00
|
|
|
if (eobtotal == 0 &&
|
|
|
|
mode != B_PRED &&
|
|
|
|
mode != SPLITMV &&
|
|
|
|
mode != I8X8_PRED &&
|
|
|
|
!bool_error(bc)) {
|
2012-07-14 00:21:29 +02:00
|
|
|
/* Special case: Force the loopfilter to skip when eobtotal and
|
2013-02-23 02:27:34 +01:00
|
|
|
mb_skip_coeff are zero. */
|
2012-07-14 00:21:29 +02:00
|
|
|
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
skip_recon_mb(pbi, xd, mb_row, mb_col);
|
2012-11-08 20:03:00 +01:00
|
|
|
return;
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
#if 0 // def DEC_DEBUG
|
2012-11-16 00:14:38 +01:00
|
|
|
if (dec_debug)
|
|
|
|
printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size);
|
|
|
|
#endif
|
2011-07-20 23:21:24 +02:00
|
|
|
|
2012-08-16 22:25:49 +02:00
|
|
|
// moved to be performed before detokenization
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
// if (xd->segmentation_enabled)
|
|
|
|
// mb_init_dequantizer(pbi, xd);
|
2010-05-28 20:28:12 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* do prediction */
|
|
|
|
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
|
|
|
|
if (mode != I8X8_PRED) {
|
2012-11-17 07:26:12 +01:00
|
|
|
vp9_build_intra_predictors_mbuv(xd);
|
2012-07-14 00:21:29 +02:00
|
|
|
if (mode != B_PRED) {
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_build_intra_predictors_mby(xd);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
} else {
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
#if 0 // def DEC_DEBUG
|
2012-11-30 01:39:15 +01:00
|
|
|
if (dec_debug)
|
|
|
|
printf("Decoding mb: %d %d interp %d\n",
|
|
|
|
xd->mode_info_context->mbmi.mode, tx_size,
|
|
|
|
xd->mode_info_context->mbmi.interp_filter);
|
|
|
|
#endif
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 00:14:38 +01:00
|
|
|
if (tx_size == TX_16X16) {
|
|
|
|
decode_16x16(pbi, xd, bc);
|
|
|
|
} else if (tx_size == TX_8X8) {
|
|
|
|
decode_8x8(pbi, xd, bc);
|
|
|
|
} else {
|
|
|
|
decode_4x4(pbi, xd, bc);
|
|
|
|
}
|
|
|
|
#ifdef DEC_DEBUG
|
|
|
|
if (dec_debug) {
|
|
|
|
int i, j;
|
|
|
|
printf("\n");
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
printf("predictor y\n");
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
for (j = 0; j < 16; j++)
|
|
|
|
printf("%3d ", xd->predictor[i * 16 + j]);
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
printf("\n");
|
2012-11-16 00:14:38 +01:00
|
|
|
printf("final y\n");
|
2012-07-14 00:21:29 +02:00
|
|
|
for (i = 0; i < 16; i++) {
|
2012-11-16 00:14:38 +01:00
|
|
|
for (j = 0; j < 16; j++)
|
|
|
|
printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
|
|
|
|
printf("\n");
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-11-16 00:14:38 +01:00
|
|
|
printf("\n");
|
|
|
|
printf("final u\n");
|
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
for (j = 0; j < 8; j++)
|
|
|
|
printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
|
|
|
|
printf("\n");
|
2012-10-22 20:49:00 +02:00
|
|
|
}
|
2012-11-16 00:14:38 +01:00
|
|
|
printf("\n");
|
|
|
|
printf("final v\n");
|
|
|
|
for (i = 0; i < 8; i++) {
|
|
|
|
for (j = 0; j < 8; j++)
|
|
|
|
printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
|
|
|
|
printf("\n");
|
2011-02-14 23:18:18 +01:00
|
|
|
}
|
2012-11-16 00:14:38 +01:00
|
|
|
fflush(stdout);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-11-16 00:14:38 +01:00
|
|
|
#endif
|
2011-07-20 23:21:24 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2010-09-16 20:08:52 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static int get_delta_q(vp9_reader *bc, int prev, int *q_update) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int ret_val = 0;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
if (vp9_read_bit(bc)) {
|
|
|
|
ret_val = vp9_read_literal(bc, 4);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
if (vp9_read_bit(bc))
|
2012-07-14 00:21:29 +02:00
|
|
|
ret_val = -ret_val;
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* Trigger a quantizer update if the delta-q value has changed */
|
|
|
|
if (ret_val != prev)
|
|
|
|
*q_update = 1;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
return ret_val;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef PACKET_TESTING
|
|
|
|
#include <stdio.h>
|
|
|
|
FILE *vpxlog = 0;
|
|
|
|
#endif
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
static void set_offsets(VP9D_COMP *pbi, int block_size,
|
|
|
|
int mb_row, int mb_col) {
|
|
|
|
VP9_COMMON *const cm = &pbi->common;
|
|
|
|
MACROBLOCKD *const xd = &pbi->mb;
|
|
|
|
const int mis = cm->mode_info_stride;
|
|
|
|
const int idx = mis * mb_row + mb_col;
|
|
|
|
const int dst_fb_idx = cm->new_fb_idx;
|
|
|
|
const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride;
|
|
|
|
const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride;
|
|
|
|
const int recon_yoffset = mb_row * 16 * recon_y_stride + 16 * mb_col;
|
|
|
|
const int recon_uvoffset = mb_row * 8 * recon_uv_stride + 8 * mb_col;
|
|
|
|
|
|
|
|
xd->mode_info_context = cm->mi + idx;
|
|
|
|
xd->mode_info_context->mbmi.sb_type = block_size >> 5;
|
|
|
|
xd->prev_mode_info_context = cm->prev_mi + idx;
|
|
|
|
xd->above_context = cm->above_context + mb_col;
|
|
|
|
xd->left_context = cm->left_context + (mb_row & 3);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
// Distance of Mb to the various image edges.
|
|
|
|
// These are specified to 8th pel as they are always compared to
|
|
|
|
// values that are in 1/8th pel units
|
2013-01-06 03:20:25 +01:00
|
|
|
block_size >>= 4; // in mb units
|
|
|
|
|
2013-02-20 19:16:24 +01:00
|
|
|
set_mb_row(cm, xd, mb_row, block_size);
|
|
|
|
set_mb_col(cm, xd, mb_col, block_size);
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
|
|
|
|
xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
|
|
|
|
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
static void set_refs(VP9D_COMP *pbi, int block_size, int mb_row, int mb_col) {
|
2013-01-06 03:20:25 +01:00
|
|
|
VP9_COMMON *const cm = &pbi->common;
|
|
|
|
MACROBLOCKD *const xd = &pbi->mb;
|
2013-03-14 20:31:54 +01:00
|
|
|
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
if (mbmi->ref_frame > INTRA_FRAME) {
|
2013-03-14 20:31:54 +01:00
|
|
|
// Select the appropriate reference frame for this MB
|
|
|
|
int ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1];
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
|
|
|
|
xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
|
|
|
|
setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col,
|
|
|
|
&xd->scale_factor[0], &xd->scale_factor_uv[0]);
|
2012-08-20 23:43:34 +02:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
// propagate errors from reference frames
|
2013-01-06 03:20:25 +01:00
|
|
|
xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted;
|
2012-08-20 23:43:34 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (mbmi->second_ref_frame > INTRA_FRAME) {
|
2013-03-14 20:31:54 +01:00
|
|
|
// Select the appropriate reference frame for this MB
|
|
|
|
int second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];
|
2013-01-06 03:20:25 +01:00
|
|
|
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx],
|
|
|
|
mb_row, mb_col,
|
|
|
|
&xd->scale_factor[1], &xd->scale_factor_uv[1]);
|
2013-01-06 03:20:25 +01:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
// propagate errors from reference frames
|
2013-01-06 03:20:25 +01:00
|
|
|
xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
/* Decode a row of Superblocks (2x2 region of MBs) */
|
|
|
|
static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc,
|
|
|
|
int mb_row, MACROBLOCKD *xd,
|
|
|
|
BOOL_DECODER* const bc) {
|
|
|
|
int mb_col;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
// For a SB there are 2 left contexts, each pertaining to a MB row within
|
|
|
|
vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
|
2012-07-14 00:21:29 +02:00
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
for (mb_col = pc->cur_tile_mb_col_start;
|
|
|
|
mb_col < pc->cur_tile_mb_col_end; mb_col += 4) {
|
2013-01-06 03:20:25 +01:00
|
|
|
if (vp9_read(bc, pc->sb64_coded)) {
|
2013-02-20 19:16:24 +01:00
|
|
|
#ifdef DEC_DEBUG
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
|
|
|
|
mb_row == 8 && mb_col == 0);
|
2013-02-20 19:16:24 +01:00
|
|
|
if (dec_debug)
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
printf("Debug Decode SB64\n");
|
2013-02-20 19:16:24 +01:00
|
|
|
#endif
|
2013-01-06 03:20:25 +01:00
|
|
|
set_offsets(pbi, 64, mb_row, mb_col);
|
2012-10-30 22:51:31 +01:00
|
|
|
vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc);
|
2013-01-06 03:20:25 +01:00
|
|
|
set_refs(pbi, 64, mb_row, mb_col);
|
|
|
|
decode_superblock64(pbi, xd, mb_row, mb_col, bc);
|
|
|
|
xd->corrupted |= bool_error(bc);
|
2013-01-10 02:21:28 +01:00
|
|
|
} else {
|
2013-01-06 03:20:25 +01:00
|
|
|
int j;
|
2012-08-06 19:51:20 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
for (j = 0; j < 4; j++) {
|
|
|
|
const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (mb_row + y_idx_sb >= pc->mb_rows ||
|
|
|
|
mb_col + x_idx_sb >= pc->mb_cols) {
|
|
|
|
// MB lies outside frame, skip on to next
|
|
|
|
continue;
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
xd->sb_index = j;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (vp9_read(bc, pc->sb32_coded)) {
|
2013-02-20 19:16:24 +01:00
|
|
|
#ifdef DEC_DEBUG
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
|
|
|
|
mb_row + y_idx_sb == 8 && mb_col + x_idx_sb == 0);
|
|
|
|
if (dec_debug)
|
|
|
|
printf("Debug Decode SB32\n");
|
2013-02-20 19:16:24 +01:00
|
|
|
#endif
|
2013-01-06 03:20:25 +01:00
|
|
|
set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb);
|
|
|
|
vp9_decode_mb_mode_mv(pbi,
|
|
|
|
xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc);
|
|
|
|
set_refs(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb);
|
|
|
|
decode_superblock32(pbi,
|
|
|
|
xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc);
|
|
|
|
xd->corrupted |= bool_error(bc);
|
2013-01-08 19:29:22 +01:00
|
|
|
} else {
|
2013-01-06 03:20:25 +01:00
|
|
|
int i;
|
|
|
|
|
|
|
|
// Process the 4 MBs within the SB in the order:
|
|
|
|
// top-left, top-right, bottom-left, bottom-right
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1);
|
|
|
|
|
|
|
|
if (mb_row + y_idx >= pc->mb_rows ||
|
|
|
|
mb_col + x_idx >= pc->mb_cols) {
|
|
|
|
// MB lies outside frame, skip on to next
|
|
|
|
continue;
|
|
|
|
}
|
2013-02-20 19:16:24 +01:00
|
|
|
#ifdef DEC_DEBUG
|
Implicit weighted prediction experiment
Adds an experiment to use a weighted prediction of two INTER
predictors, where the weight is one of (1/4, 3/4), (3/8, 5/8),
(1/2, 1/2), (5/8, 3/8) or (3/4, 1/4), and is chosen implicitly
based on consistency of the predictors to the already
reconstructed pixels to the top and left of the current macroblock
or superblock.
Currently the weighting is not applied to SPLITMV modes, which
default to the usual (1/2, 1/2) weighting. However the code is in
place controlled by a macro. The same weighting is used for Y and
UV components, where the weight is derived from analyzing the Y
component only.
Results (over compound inter-intra experiment)
derf: +0.18%
yt: +0.34%
hd: +0.49%
stdhd: +0.23%
The experiment suggests bigger benefit for explicitly signaled weights.
Change-Id: I5438539ff4485c5752874cd1eb078ff14bf5235a
2013-03-12 22:21:08 +01:00
|
|
|
dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
|
|
|
|
mb_row + y_idx == 8 && mb_col + x_idx == 0);
|
|
|
|
if (dec_debug)
|
|
|
|
printf("Debug Decode MB\n");
|
2013-02-20 19:16:24 +01:00
|
|
|
#endif
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx);
|
|
|
|
xd->mb_index = i;
|
|
|
|
vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc);
|
|
|
|
set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx);
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
decode_macroblock(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
/* check if the boolean decoder has suffered an error */
|
|
|
|
xd->corrupted |= bool_error(bc);
|
|
|
|
}
|
|
|
|
}
|
2012-08-20 23:43:34 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2011-08-05 18:27:25 +02:00
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
static void setup_token_decoder(VP9D_COMP *pbi,
|
2012-10-18 01:47:38 +02:00
|
|
|
const unsigned char *cx_data,
|
|
|
|
BOOL_DECODER* const bool_decoder) {
|
2013-03-12 01:02:27 +01:00
|
|
|
VP9_COMMON *pc = &pbi->common;
|
2012-07-14 00:21:29 +02:00
|
|
|
const unsigned char *user_data_end = pbi->Source + pbi->source_sz;
|
2013-03-12 01:02:27 +01:00
|
|
|
const unsigned char *partition = cx_data;
|
|
|
|
ptrdiff_t bytes_left = user_data_end - partition;
|
|
|
|
ptrdiff_t partition_size = bytes_left;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
// Validate the calculated partition length. If the buffer
|
|
|
|
// described by the partition can't be fully read, then restrict
|
|
|
|
// it to the portion that can be (for EC mode) or throw an error.
|
2012-07-14 00:21:29 +02:00
|
|
|
if (!read_is_valid(partition, partition_size, user_data_end)) {
|
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
|
|
|
"Truncated packet or corrupt partition "
|
|
|
|
"%d length", 1);
|
|
|
|
}
|
|
|
|
|
2012-11-05 23:22:59 +01:00
|
|
|
if (vp9_start_decode(bool_decoder,
|
|
|
|
partition, (unsigned int)partition_size))
|
2012-07-14 00:21:29 +02:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
|
|
|
|
"Failed to allocate bool decoder %d", 1);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
static void init_frame(VP9D_COMP *pbi) {
|
|
|
|
VP9_COMMON *const pc = &pbi->common;
|
2013-03-14 20:31:54 +01:00
|
|
|
MACROBLOCKD *const xd = &pbi->mb;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (pc->frame_type == KEY_FRAME) {
|
2013-01-15 15:43:35 +01:00
|
|
|
vp9_setup_past_independence(pc, xd);
|
2013-03-12 01:02:27 +01:00
|
|
|
// All buffers are implicitly updated on key frames.
|
2013-01-15 22:49:44 +01:00
|
|
|
pbi->refresh_frame_flags = (1 << NUM_REF_FRAMES) - 1;
|
2013-01-15 15:43:35 +01:00
|
|
|
} else if (pc->error_resilient_mode) {
|
|
|
|
vp9_setup_past_independence(pc, xd);
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-15 15:43:35 +01:00
|
|
|
if (pc->frame_type != KEY_FRAME) {
|
2013-03-12 01:02:27 +01:00
|
|
|
pc->mcomp_filter_type = pc->use_bilinear_mc_filter ? BILINEAR : EIGHTTAP;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
// To enable choice of different interpolation filters
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
xd->mode_info_context = pc->mi;
|
2012-08-03 21:31:38 +02:00
|
|
|
xd->prev_mode_info_context = pc->prev_mi;
|
2012-07-14 00:21:29 +02:00
|
|
|
xd->frame_type = pc->frame_type;
|
|
|
|
xd->mode_info_context->mbmi.mode = DC_PRED;
|
|
|
|
xd->mode_info_stride = pc->mode_info_stride;
|
2013-03-12 01:02:27 +01:00
|
|
|
xd->corrupted = 0;
|
|
|
|
xd->fullpixel_mask = pc->full_pixel ? 0xfffffff8 : 0xffffffff;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2013-02-20 19:16:24 +01:00
|
|
|
#if CONFIG_CODE_NONZEROCOUNT
|
|
|
|
static void read_nzc_probs_common(VP9_COMMON *cm,
|
|
|
|
BOOL_DECODER* const bc,
|
|
|
|
int block_size) {
|
|
|
|
int c, r, b, t;
|
|
|
|
int tokens, nodes;
|
|
|
|
vp9_prob *nzc_probs;
|
|
|
|
vp9_prob upd;
|
|
|
|
|
|
|
|
if (!vp9_read_bit(bc)) return;
|
|
|
|
|
|
|
|
if (block_size == 32) {
|
|
|
|
tokens = NZC32X32_TOKENS;
|
|
|
|
nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
|
|
|
|
upd = NZC_UPDATE_PROB_32X32;
|
|
|
|
} else if (block_size == 16) {
|
|
|
|
tokens = NZC16X16_TOKENS;
|
|
|
|
nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
|
|
|
|
upd = NZC_UPDATE_PROB_16X16;
|
|
|
|
} else if (block_size == 8) {
|
|
|
|
tokens = NZC8X8_TOKENS;
|
|
|
|
nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
|
|
|
|
upd = NZC_UPDATE_PROB_8X8;
|
|
|
|
} else {
|
|
|
|
tokens = NZC4X4_TOKENS;
|
|
|
|
nzc_probs = cm->fc.nzc_probs_4x4[0][0][0];
|
|
|
|
upd = NZC_UPDATE_PROB_4X4;
|
|
|
|
}
|
|
|
|
nodes = tokens - 1;
|
|
|
|
for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
|
|
|
|
for (r = 0; r < REF_TYPES; ++r) {
|
|
|
|
for (b = 0; b < BLOCK_TYPES; ++b) {
|
|
|
|
int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
|
|
|
|
int offset_nodes = offset * nodes;
|
|
|
|
for (t = 0; t < nodes; ++t) {
|
|
|
|
vp9_prob *p = &nzc_probs[offset_nodes + t];
|
|
|
|
if (vp9_read(bc, upd)) {
|
|
|
|
*p = read_prob_diff_update(bc, *p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-08 05:56:34 +01:00
|
|
|
static void read_nzc_pcat_probs(VP9_COMMON *cm, BOOL_DECODER* const bc) {
|
|
|
|
int c, t, b;
|
|
|
|
vp9_prob upd = NZC_UPDATE_PROB_PCAT;
|
|
|
|
if (!vp9_read_bit(bc)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
|
|
|
|
for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
|
|
|
|
int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
|
|
|
|
for (b = 0; b < bits; ++b) {
|
|
|
|
vp9_prob *p = &cm->fc.nzc_pcat_probs[c][t][b];
|
|
|
|
if (vp9_read(bc, upd)) {
|
|
|
|
*p = read_prob_diff_update(bc, *p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-20 19:16:24 +01:00
|
|
|
static void read_nzc_probs(VP9_COMMON *cm,
|
|
|
|
BOOL_DECODER* const bc) {
|
|
|
|
read_nzc_probs_common(cm, bc, 4);
|
|
|
|
if (cm->txfm_mode != ONLY_4X4)
|
|
|
|
read_nzc_probs_common(cm, bc, 8);
|
|
|
|
if (cm->txfm_mode > ALLOW_8X8)
|
|
|
|
read_nzc_probs_common(cm, bc, 16);
|
|
|
|
if (cm->txfm_mode > ALLOW_16X16)
|
|
|
|
read_nzc_probs_common(cm, bc, 32);
|
2013-03-08 05:56:34 +01:00
|
|
|
#ifdef NZC_PCAT_UPDATE
|
|
|
|
read_nzc_pcat_probs(cm, bc);
|
|
|
|
#endif
|
2013-02-20 19:16:24 +01:00
|
|
|
}
|
|
|
|
#endif // CONFIG_CODE_NONZEROCOUNT
|
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
static void read_coef_probs_common(BOOL_DECODER* const bc,
|
|
|
|
vp9_coeff_probs *coef_probs,
|
|
|
|
int block_types) {
|
2013-03-13 19:03:17 +01:00
|
|
|
#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
|
|
|
|
const int entropy_nodes_update = UNCONSTRAINED_UPDATE_NODES;
|
|
|
|
#else
|
|
|
|
const int entropy_nodes_update = ENTROPY_NODES;
|
|
|
|
#endif
|
|
|
|
|
2013-02-19 22:36:38 +01:00
|
|
|
int i, j, k, l, m;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
if (vp9_read_bit(bc)) {
|
2012-12-08 01:09:59 +01:00
|
|
|
for (i = 0; i < block_types; i++) {
|
2013-02-19 22:36:38 +01:00
|
|
|
for (j = 0; j < REF_TYPES; j++) {
|
|
|
|
for (k = 0; k < COEF_BANDS; k++) {
|
|
|
|
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
|
|
|
|
if (l >= 3 && k == 0)
|
|
|
|
continue;
|
2013-03-13 19:03:17 +01:00
|
|
|
for (m = CONFIG_CODE_NONZEROCOUNT; m < entropy_nodes_update; m++) {
|
2013-02-19 22:36:38 +01:00
|
|
|
vp9_prob *const p = coef_probs[i][j][k][l] + m;
|
|
|
|
|
2013-03-13 19:03:17 +01:00
|
|
|
if (vp9_read(bc, vp9_coef_update_prob[m])) {
|
2013-02-19 22:36:38 +01:00
|
|
|
*p = read_prob_diff_update(bc, *p);
|
2013-03-13 19:03:17 +01:00
|
|
|
#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
|
2013-03-26 15:29:24 +01:00
|
|
|
if (m == UNCONSTRAINED_NODES - 1)
|
2013-03-13 19:03:17 +01:00
|
|
|
vp9_get_model_distribution(*p, coef_probs[i][j][k][l], i, j);
|
|
|
|
#endif
|
2013-02-19 22:36:38 +01:00
|
|
|
}
|
2012-09-10 07:42:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-10-20 00:35:36 +02:00
|
|
|
}
|
|
|
|
}
|
2012-09-10 07:42:35 +02:00
|
|
|
}
|
2012-10-20 00:35:36 +02:00
|
|
|
}
|
2012-09-10 07:42:35 +02:00
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) {
|
|
|
|
VP9_COMMON *const pc = &pbi->common;
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2013-02-19 22:36:38 +01:00
|
|
|
read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES);
|
2012-10-24 21:59:22 +02:00
|
|
|
|
2013-03-12 01:02:27 +01:00
|
|
|
if (pbi->common.txfm_mode != ONLY_4X4)
|
2013-02-19 22:36:38 +01:00
|
|
|
read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES);
|
2013-03-12 01:02:27 +01:00
|
|
|
|
|
|
|
if (pbi->common.txfm_mode > ALLOW_8X8)
|
2013-02-19 22:36:38 +01:00
|
|
|
read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES);
|
2013-03-12 01:02:27 +01:00
|
|
|
|
|
|
|
if (pbi->common.txfm_mode > ALLOW_16X16)
|
2013-03-04 23:12:17 +01:00
|
|
|
read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES);
|
2012-04-12 18:24:03 +02:00
|
|
|
}
|
|
|
|
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
static void update_frame_size(VP9D_COMP *pbi) {
|
|
|
|
VP9_COMMON *cm = &pbi->common;
|
|
|
|
|
|
|
|
/* our internal buffers are always multiples of 16 */
|
2013-03-21 00:41:30 +01:00
|
|
|
const int width = (cm->width + 15) & ~15;
|
|
|
|
const int height = (cm->height + 15) & ~15;
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
|
|
|
|
cm->mb_rows = height >> 4;
|
|
|
|
cm->mb_cols = width >> 4;
|
|
|
|
cm->MBs = cm->mb_rows * cm->mb_cols;
|
|
|
|
cm->mode_info_stride = cm->mb_cols + 1;
|
|
|
|
memset(cm->mip, 0,
|
|
|
|
(cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO));
|
|
|
|
vp9_update_mode_info_border(cm, cm->mip);
|
|
|
|
|
|
|
|
cm->mi = cm->mip + cm->mode_info_stride + 1;
|
|
|
|
cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
|
|
|
|
vp9_update_mode_info_in_image(cm, cm->mi);
|
|
|
|
}
|
|
|
|
|
2013-03-26 19:04:25 +01:00
|
|
|
static void setup_segmentation(VP9_COMMON *pc, MACROBLOCKD *xd,
|
|
|
|
BOOL_DECODER *header_bc) {
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
// Is segmentation enabled
|
|
|
|
xd->segmentation_enabled = vp9_read_bit(header_bc);
|
|
|
|
|
|
|
|
if (xd->segmentation_enabled) {
|
|
|
|
// Read whether or not the segmentation map is being explicitly updated
|
|
|
|
// this frame.
|
|
|
|
xd->update_mb_segmentation_map = vp9_read_bit(header_bc);
|
|
|
|
|
|
|
|
// If so what method will be used.
|
|
|
|
if (xd->update_mb_segmentation_map) {
|
|
|
|
// Which macro block level features are enabled. Read the probs used to
|
|
|
|
// decode the segment id for each macro block.
|
|
|
|
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) {
|
|
|
|
xd->mb_segment_tree_probs[i] = vp9_read_bit(header_bc) ?
|
|
|
|
(vp9_prob)vp9_read_literal(header_bc, 8) : 255;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read the prediction probs needed to decode the segment id
|
|
|
|
pc->temporal_update = vp9_read_bit(header_bc);
|
|
|
|
for (i = 0; i < PREDICTION_PROBS; i++) {
|
|
|
|
if (pc->temporal_update) {
|
|
|
|
pc->segment_pred_probs[i] = vp9_read_bit(header_bc) ?
|
|
|
|
(vp9_prob)vp9_read_literal(header_bc, 8) : 255;
|
|
|
|
} else {
|
|
|
|
pc->segment_pred_probs[i] = 255;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pc->temporal_update) {
|
|
|
|
int count[4];
|
|
|
|
const vp9_prob *p = xd->mb_segment_tree_probs;
|
|
|
|
vp9_prob *p_mod = xd->mb_segment_mispred_tree_probs;
|
|
|
|
|
|
|
|
count[0] = p[0] * p[1];
|
|
|
|
count[1] = p[0] * (256 - p[1]);
|
|
|
|
count[2] = (256 - p[0]) * p[2];
|
|
|
|
count[3] = (256 - p[0]) * (256 - p[2]);
|
|
|
|
|
|
|
|
p_mod[0] = get_binary_prob(count[1], count[2] + count[3]);
|
|
|
|
p_mod[1] = get_binary_prob(count[0], count[2] + count[3]);
|
|
|
|
p_mod[2] = get_binary_prob(count[0] + count[1], count[3]);
|
|
|
|
p_mod[3] = get_binary_prob(count[0] + count[1], count[2]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Is the segment data being updated
|
|
|
|
xd->update_mb_segmentation_data = vp9_read_bit(header_bc);
|
|
|
|
|
|
|
|
if (xd->update_mb_segmentation_data) {
|
|
|
|
int data;
|
|
|
|
|
|
|
|
xd->mb_segment_abs_delta = vp9_read_bit(header_bc);
|
|
|
|
|
|
|
|
vp9_clearall_segfeatures(xd);
|
|
|
|
|
|
|
|
// For each segmentation...
|
|
|
|
for (i = 0; i < MAX_MB_SEGMENTS; i++) {
|
|
|
|
// For each of the segments features...
|
|
|
|
for (j = 0; j < SEG_LVL_MAX; j++) {
|
|
|
|
// Is the feature enabled
|
|
|
|
if (vp9_read_bit(header_bc)) {
|
|
|
|
// Update the feature data and mask
|
|
|
|
vp9_enable_segfeature(xd, i, j);
|
|
|
|
|
|
|
|
data = vp9_decode_unsigned_max(header_bc,
|
|
|
|
vp9_seg_feature_data_max(j));
|
|
|
|
|
|
|
|
// Is the segment data signed..
|
|
|
|
if (vp9_is_segfeature_signed(j)) {
|
|
|
|
if (vp9_read_bit(header_bc))
|
|
|
|
data = -data;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
data = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vp9_set_segdata(xd, i, j, data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void setup_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd,
|
|
|
|
BOOL_DECODER *header_bc) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(header_bc);
|
|
|
|
pc->filter_level = vp9_read_literal(header_bc, 6);
|
|
|
|
pc->sharpness_level = vp9_read_literal(header_bc, 3);
|
|
|
|
|
|
|
|
#if CONFIG_LOOP_DERING
|
|
|
|
if (vp9_read_bit(header_bc))
|
|
|
|
pc->dering_enabled = 1 + vp9_read_literal(header_bc, 4);
|
|
|
|
else
|
|
|
|
pc->dering_enabled = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// Read in loop filter deltas applied at the MB level based on mode or ref
|
|
|
|
// frame.
|
|
|
|
xd->mode_ref_lf_delta_update = 0;
|
|
|
|
xd->mode_ref_lf_delta_enabled = vp9_read_bit(header_bc);
|
|
|
|
|
|
|
|
if (xd->mode_ref_lf_delta_enabled) {
|
|
|
|
// Do the deltas need to be updated
|
|
|
|
xd->mode_ref_lf_delta_update = vp9_read_bit(header_bc);
|
|
|
|
|
|
|
|
if (xd->mode_ref_lf_delta_update) {
|
|
|
|
// Send update
|
|
|
|
for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
|
|
|
|
if (vp9_read_bit(header_bc)) {
|
|
|
|
// sign = vp9_read_bit( &header_bc );
|
|
|
|
xd->ref_lf_deltas[i] = (signed char)vp9_read_literal(header_bc, 6);
|
|
|
|
|
|
|
|
if (vp9_read_bit(header_bc))
|
|
|
|
xd->ref_lf_deltas[i] = -xd->ref_lf_deltas[i]; // Apply sign
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Send update
|
|
|
|
for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
|
|
|
|
if (vp9_read_bit(header_bc)) {
|
|
|
|
// sign = vp9_read_bit( &header_bc );
|
|
|
|
xd->mode_lf_deltas[i] = (signed char)vp9_read_literal(header_bc, 6);
|
|
|
|
|
|
|
|
if (vp9_read_bit(header_bc))
|
|
|
|
xd->mode_lf_deltas[i] = -xd->mode_lf_deltas[i]; // Apply sign
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-11-15 21:19:07 +01:00
|
|
|
int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
|
2012-10-18 01:47:38 +02:00
|
|
|
BOOL_DECODER header_bc, residual_bc;
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const pc = &pbi->common;
|
2012-10-17 23:51:27 +02:00
|
|
|
MACROBLOCKD *const xd = &pbi->mb;
|
2013-03-14 20:31:54 +01:00
|
|
|
const uint8_t *data = (const uint8_t *)pbi->Source;
|
|
|
|
const uint8_t *data_end = data + pbi->source_sz;
|
2012-07-14 00:21:29 +02:00
|
|
|
ptrdiff_t first_partition_length_in_bytes = 0;
|
2013-03-26 19:04:25 +01:00
|
|
|
int mb_row, i, corrupt_tokens = 0;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-15 15:43:35 +01:00
|
|
|
// printf("Decoding frame %d\n", pc->current_video_frame);
|
2012-07-14 00:21:29 +02:00
|
|
|
/* start with no corruption of current frame */
|
|
|
|
xd->corrupted = 0;
|
|
|
|
pc->yv12_fb[pc->new_fb_idx].corrupted = 0;
|
|
|
|
|
|
|
|
if (data_end - data < 3) {
|
2013-02-23 02:27:34 +01:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
|
2012-07-14 00:21:29 +02:00
|
|
|
} else {
|
2013-03-14 22:36:08 +01:00
|
|
|
int scaling_active;
|
2012-07-14 00:21:29 +02:00
|
|
|
pc->last_frame_type = pc->frame_type;
|
|
|
|
pc->frame_type = (FRAME_TYPE)(data[0] & 1);
|
|
|
|
pc->version = (data[0] >> 1) & 7;
|
|
|
|
pc->show_frame = (data[0] >> 4) & 1;
|
2013-03-14 22:36:08 +01:00
|
|
|
scaling_active = (data[0] >> 5) & 1;
|
2013-03-25 22:24:26 +01:00
|
|
|
first_partition_length_in_bytes = read_le16(data + 1);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
if (!read_is_valid(data, first_partition_length_in_bytes, data_end))
|
2012-07-14 00:21:29 +02:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
|
|
|
"Truncated packet or corrupt partition 0 length");
|
|
|
|
|
|
|
|
data += 3;
|
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_setup_version(pc);
|
2011-07-20 23:21:24 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (pc->frame_type == KEY_FRAME) {
|
|
|
|
/* vet via sync code */
|
|
|
|
/* When error concealment is enabled we should only check the sync
|
|
|
|
* code if we have enough bits available
|
|
|
|
*/
|
|
|
|
if (data + 3 < data_end) {
|
|
|
|
if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
|
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
|
|
|
|
"Invalid frame sync code");
|
|
|
|
}
|
2013-02-07 00:54:52 +01:00
|
|
|
data += 3;
|
|
|
|
}
|
|
|
|
{
|
2013-03-21 00:41:30 +01:00
|
|
|
const int width = pc->width;
|
|
|
|
const int height = pc->height;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
/* If error concealment is enabled we should only parse the new size
|
|
|
|
* if we have enough data. Otherwise we will end up with the wrong
|
|
|
|
* size.
|
|
|
|
*/
|
2013-03-14 22:36:08 +01:00
|
|
|
if (scaling_active && data + 4 < data_end) {
|
|
|
|
pc->display_width = read_le16(data + 0);
|
|
|
|
pc->display_height = read_le16(data + 2);
|
|
|
|
data += 4;
|
|
|
|
}
|
|
|
|
if (data + 4 < data_end) {
|
2013-03-21 00:41:30 +01:00
|
|
|
pc->width = read_le16(data + 0);
|
|
|
|
pc->height = read_le16(data + 2);
|
2013-03-14 22:36:08 +01:00
|
|
|
data += 4;
|
|
|
|
}
|
|
|
|
if (!scaling_active) {
|
2013-03-21 00:41:30 +01:00
|
|
|
pc->display_width = pc->width;
|
|
|
|
pc->display_height = pc->height;
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
2013-03-21 00:41:30 +01:00
|
|
|
if (width != pc->width || height != pc->height) {
|
|
|
|
if (pc->width <= 0) {
|
|
|
|
pc->width = width;
|
2012-07-14 00:21:29 +02:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
|
|
|
"Invalid frame width");
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-03-21 00:41:30 +01:00
|
|
|
if (pc->height <= 0) {
|
|
|
|
pc->height = height;
|
2012-07-14 00:21:29 +02:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
|
|
|
"Invalid frame height");
|
|
|
|
}
|
2010-12-16 16:46:31 +01:00
|
|
|
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
if (!pbi->initial_width || !pbi->initial_height) {
|
2013-03-21 00:41:30 +01:00
|
|
|
if (vp9_alloc_frame_buffers(pc, pc->width, pc->height))
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
|
|
|
|
"Failed to allocate frame buffers");
|
2013-03-21 00:41:30 +01:00
|
|
|
pbi->initial_width = pc->width;
|
|
|
|
pbi->initial_height = pc->height;
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
}
|
|
|
|
|
2013-03-21 00:41:30 +01:00
|
|
|
if (pc->width > pbi->initial_width) {
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
|
|
|
"Frame width too large");
|
|
|
|
}
|
|
|
|
|
2013-03-21 00:41:30 +01:00
|
|
|
if (pc->height > pbi->initial_height) {
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
|
|
|
"Frame height too large");
|
|
|
|
}
|
|
|
|
|
|
|
|
update_frame_size(pbi);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2011-05-02 15:30:51 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) ||
|
2013-03-21 00:41:30 +01:00
|
|
|
pc->width == 0 || pc->height == 0) {
|
2012-07-14 00:21:29 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2011-05-02 15:30:51 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
init_frame(pbi);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
/* Reset the frame pointers to the current frame size */
|
|
|
|
vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx],
|
2013-03-21 00:41:30 +01:00
|
|
|
pc->width, pc->height,
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
VP9BORDERINPIXELS);
|
|
|
|
|
2012-11-05 23:22:59 +01:00
|
|
|
if (vp9_start_decode(&header_bc, data,
|
|
|
|
(unsigned int)first_partition_length_in_bytes))
|
2012-07-14 00:21:29 +02:00
|
|
|
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
|
|
|
|
"Failed to allocate bool decoder 0");
|
2013-02-07 00:54:52 +01:00
|
|
|
pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc);
|
|
|
|
pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-15 15:43:35 +01:00
|
|
|
pc->error_resilient_mode = vp9_read_bit(&header_bc);
|
2011-11-11 11:10:06 +01:00
|
|
|
|
2013-03-26 19:04:25 +01:00
|
|
|
setup_segmentation(pc, xd, &header_bc);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Read common prediction model status flag probability updates for the
|
|
|
|
// reference frame
|
|
|
|
if (pc->frame_type == KEY_FRAME) {
|
|
|
|
// Set the prediction probabilities to defaults
|
|
|
|
pc->ref_pred_probs[0] = 120;
|
|
|
|
pc->ref_pred_probs[1] = 80;
|
|
|
|
pc->ref_pred_probs[2] = 40;
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < PREDICTION_PROBS; i++) {
|
2012-10-31 22:40:53 +01:00
|
|
|
if (vp9_read_bit(&header_bc))
|
|
|
|
pc->ref_pred_probs[i] = (vp9_prob)vp9_read_literal(&header_bc, 8);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
}
|
2012-01-28 13:20:14 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
pc->sb64_coded = vp9_read_literal(&header_bc, 8);
|
|
|
|
pc->sb32_coded = vp9_read_literal(&header_bc, 8);
|
2013-02-12 06:14:46 +01:00
|
|
|
xd->lossless = vp9_read_bit(&header_bc);
|
|
|
|
if (xd->lossless) {
|
|
|
|
pc->txfm_mode = ONLY_4X4;
|
2013-02-20 16:27:35 +01:00
|
|
|
} else {
|
2013-03-14 20:31:54 +01:00
|
|
|
// Read the loop filter level and type
|
2013-02-12 06:14:46 +01:00
|
|
|
pc->txfm_mode = vp9_read_literal(&header_bc, 2);
|
|
|
|
if (pc->txfm_mode == 3)
|
|
|
|
pc->txfm_mode += vp9_read_bit(&header_bc);
|
2013-03-14 20:31:54 +01:00
|
|
|
|
2013-02-12 06:14:46 +01:00
|
|
|
if (pc->txfm_mode == TX_MODE_SELECT) {
|
|
|
|
pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
|
|
|
|
pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
|
|
|
|
pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
|
|
|
|
}
|
2012-10-09 18:18:21 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-03-26 19:04:25 +01:00
|
|
|
setup_loopfilter(pc, xd, &header_bc);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-10-18 01:47:38 +02:00
|
|
|
// Dummy read for now
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_read_literal(&header_bc, 2);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
/* Read the default quantizers. */
|
|
|
|
{
|
2013-02-23 02:27:34 +01:00
|
|
|
int q_update = 0;
|
|
|
|
pc->base_qindex = vp9_read_literal(&header_bc, QINDEX_BITS);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-10-18 01:47:38 +02:00
|
|
|
/* AC 1st order Q = default */
|
|
|
|
pc->y1dc_delta_q = get_delta_q(&header_bc, pc->y1dc_delta_q, &q_update);
|
|
|
|
pc->uvdc_delta_q = get_delta_q(&header_bc, pc->uvdc_delta_q, &q_update);
|
|
|
|
pc->uvac_delta_q = get_delta_q(&header_bc, pc->uvac_delta_q, &q_update);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
if (q_update)
|
2012-10-31 00:16:28 +01:00
|
|
|
vp9_init_de_quantizer(pbi);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
/* MB level dequantizer setup */
|
|
|
|
mb_init_dequantizer(pbi, &pbi->mb);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Determine if the golden frame or ARF buffer should be updated and how.
|
|
|
|
* For all non key frames the GF and ARF refresh flags and sign bias
|
|
|
|
* flags must be set explicitly.
|
|
|
|
*/
|
2013-01-16 21:19:42 +01:00
|
|
|
if (pc->frame_type == KEY_FRAME) {
|
|
|
|
pc->active_ref_idx[0] = pc->new_fb_idx;
|
|
|
|
pc->active_ref_idx[1] = pc->new_fb_idx;
|
|
|
|
pc->active_ref_idx[2] = pc->new_fb_idx;
|
|
|
|
} else {
|
2012-07-14 00:21:29 +02:00
|
|
|
/* Should the GF or ARF be updated from the current frame */
|
2013-01-15 22:49:44 +01:00
|
|
|
pbi->refresh_frame_flags = vp9_read_literal(&header_bc, NUM_REF_FRAMES);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-16 21:19:42 +01:00
|
|
|
/* Select active reference frames */
|
|
|
|
for (i = 0; i < 3; i++) {
|
|
|
|
int ref_frame_num = vp9_read_literal(&header_bc, NUM_REF_FRAMES_LG2);
|
|
|
|
|
|
|
|
pc->active_ref_idx[i] = pc->ref_frame_map[ref_frame_num];
|
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp9_read_bit(&header_bc);
|
|
|
|
pc->ref_frame_sign_bias[ALTREF_FRAME] = vp9_read_bit(&header_bc);
|
2012-02-16 18:29:54 +01:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
// Is high precision mv allowed
|
2012-10-31 22:40:53 +01:00
|
|
|
xd->allow_high_precision_mv = (unsigned char)vp9_read_bit(&header_bc);
|
2013-03-14 20:31:54 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Read the type of subpel filter to use
|
2013-03-14 20:31:54 +01:00
|
|
|
pc->mcomp_filter_type = vp9_read_bit(&header_bc) ? SWITCHABLE :
|
|
|
|
vp9_read_literal(&header_bc, 2);
|
|
|
|
|
2012-11-07 15:50:25 +01:00
|
|
|
#if CONFIG_COMP_INTERINTRA_PRED
|
|
|
|
pc->use_interintra = vp9_read_bit(&header_bc);
|
|
|
|
#endif
|
2012-07-18 22:43:01 +02:00
|
|
|
/* To enable choice of different interploation filters */
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
2013-01-25 20:30:28 +01:00
|
|
|
if (!pc->error_resilient_mode) {
|
|
|
|
pc->refresh_entropy_probs = vp9_read_bit(&header_bc);
|
|
|
|
pc->frame_parallel_decoding_mode = vp9_read_bit(&header_bc);
|
|
|
|
} else {
|
|
|
|
pc->refresh_entropy_probs = 0;
|
|
|
|
pc->frame_parallel_decoding_mode = 1;
|
|
|
|
}
|
2013-01-16 00:57:11 +01:00
|
|
|
pc->frame_context_idx = vp9_read_literal(&header_bc, NUM_FRAME_CONTEXTS_LG2);
|
|
|
|
vpx_memcpy(&pc->fc, &pc->frame_contexts[pc->frame_context_idx],
|
|
|
|
sizeof(pc->fc));
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-11-16 17:31:32 +01:00
|
|
|
// Read inter mode probability context updates
|
|
|
|
if (pc->frame_type != KEY_FRAME) {
|
|
|
|
int i, j;
|
|
|
|
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
|
|
|
|
for (j = 0; j < 4; j++) {
|
|
|
|
if (vp9_read(&header_bc, 252)) {
|
|
|
|
pc->fc.vp9_mode_contexts[i][j] =
|
|
|
|
(vp9_prob)vp9_read_literal(&header_bc, 8);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-03-13 19:03:17 +01:00
|
|
|
#if CONFIG_MODELCOEFPROB && ADJUST_KF_COEF_PROBS
|
|
|
|
if (pc->frame_type == KEY_FRAME)
|
|
|
|
vp9_adjust_default_coef_probs(pc);
|
|
|
|
#endif
|
2012-11-16 17:31:32 +01:00
|
|
|
|
2012-12-10 13:38:48 +01:00
|
|
|
#if CONFIG_NEW_MVREF
|
|
|
|
// If Key frame reset mv ref id probabilities to defaults
|
2013-01-15 15:43:35 +01:00
|
|
|
if (pc->frame_type != KEY_FRAME) {
|
2012-12-10 13:38:48 +01:00
|
|
|
// Read any mv_ref index probability updates
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_REF_FRAMES; ++i) {
|
|
|
|
// Skip the dummy entry for intra ref frame.
|
|
|
|
if (i == INTRA_FRAME) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read any updates to probabilities
|
|
|
|
for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) {
|
|
|
|
if (vp9_read(&header_bc, VP9_MVREF_UPDATE_PROB)) {
|
|
|
|
xd->mb_mv_ref_probs[i][j] =
|
|
|
|
(vp9_prob)vp9_read_literal(&header_bc, 8);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (0) {
|
|
|
|
FILE *z = fopen("decodestats.stt", "a");
|
2013-01-15 22:49:44 +01:00
|
|
|
fprintf(z, "%6d F:%d,R:%d,Q:%d\n",
|
2012-07-14 00:21:29 +02:00
|
|
|
pc->current_video_frame,
|
|
|
|
pc->frame_type,
|
2013-01-15 22:49:44 +01:00
|
|
|
pbi->refresh_frame_flags,
|
2012-07-14 00:21:29 +02:00
|
|
|
pc->base_qindex);
|
|
|
|
fclose(z);
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_coef_probs_4x4,
|
|
|
|
pbi->common.fc.coef_probs_4x4);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_coef_probs_8x8,
|
2012-09-10 07:42:35 +02:00
|
|
|
pbi->common.fc.coef_probs_8x8);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_coef_probs_16x16,
|
2012-09-10 07:42:35 +02:00
|
|
|
pbi->common.fc.coef_probs_16x16);
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_coef_probs_32x32,
|
|
|
|
pbi->common.fc.coef_probs_32x32);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob);
|
2012-11-16 00:50:07 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_sb_ymode_prob, pbi->common.fc.sb_ymode_prob);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob);
|
|
|
|
vp9_copy(pbi->common.fc.pre_bmode_prob, pbi->common.fc.bmode_prob);
|
|
|
|
vp9_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob);
|
|
|
|
vp9_copy(pbi->common.fc.pre_sub_mv_ref_prob, pbi->common.fc.sub_mv_ref_prob);
|
|
|
|
vp9_copy(pbi->common.fc.pre_mbsplit_prob, pbi->common.fc.mbsplit_prob);
|
2012-11-07 15:50:25 +01:00
|
|
|
#if CONFIG_COMP_INTERINTRA_PRED
|
|
|
|
pbi->common.fc.pre_interintra_prob = pbi->common.fc.interintra_prob;
|
|
|
|
#endif
|
2012-07-26 22:42:07 +02:00
|
|
|
pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc;
|
2013-02-20 19:16:24 +01:00
|
|
|
#if CONFIG_CODE_NONZEROCOUNT
|
|
|
|
vp9_copy(pbi->common.fc.pre_nzc_probs_4x4,
|
|
|
|
pbi->common.fc.nzc_probs_4x4);
|
|
|
|
vp9_copy(pbi->common.fc.pre_nzc_probs_8x8,
|
|
|
|
pbi->common.fc.nzc_probs_8x8);
|
|
|
|
vp9_copy(pbi->common.fc.pre_nzc_probs_16x16,
|
|
|
|
pbi->common.fc.nzc_probs_16x16);
|
|
|
|
vp9_copy(pbi->common.fc.pre_nzc_probs_32x32,
|
|
|
|
pbi->common.fc.nzc_probs_32x32);
|
2013-03-08 05:56:34 +01:00
|
|
|
vp9_copy(pbi->common.fc.pre_nzc_pcat_probs,
|
|
|
|
pbi->common.fc.nzc_pcat_probs);
|
2013-02-20 19:16:24 +01:00
|
|
|
#endif
|
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_zero(pbi->common.fc.coef_counts_4x4);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_zero(pbi->common.fc.coef_counts_8x8);
|
|
|
|
vp9_zero(pbi->common.fc.coef_counts_16x16);
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
vp9_zero(pbi->common.fc.coef_counts_32x32);
|
2013-03-27 00:46:09 +01:00
|
|
|
vp9_zero(pbi->common.fc.eob_branch_counts);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_zero(pbi->common.fc.ymode_counts);
|
2012-11-16 00:50:07 +01:00
|
|
|
vp9_zero(pbi->common.fc.sb_ymode_counts);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_zero(pbi->common.fc.uv_mode_counts);
|
|
|
|
vp9_zero(pbi->common.fc.bmode_counts);
|
|
|
|
vp9_zero(pbi->common.fc.i8x8_mode_counts);
|
|
|
|
vp9_zero(pbi->common.fc.sub_mv_ref_counts);
|
|
|
|
vp9_zero(pbi->common.fc.mbsplit_counts);
|
|
|
|
vp9_zero(pbi->common.fc.NMVcount);
|
|
|
|
vp9_zero(pbi->common.fc.mv_ref_ct);
|
2012-11-07 15:50:25 +01:00
|
|
|
#if CONFIG_COMP_INTERINTRA_PRED
|
|
|
|
vp9_zero(pbi->common.fc.interintra_counts);
|
|
|
|
#endif
|
2013-02-20 19:16:24 +01:00
|
|
|
#if CONFIG_CODE_NONZEROCOUNT
|
|
|
|
vp9_zero(pbi->common.fc.nzc_counts_4x4);
|
|
|
|
vp9_zero(pbi->common.fc.nzc_counts_8x8);
|
|
|
|
vp9_zero(pbi->common.fc.nzc_counts_16x16);
|
|
|
|
vp9_zero(pbi->common.fc.nzc_counts_32x32);
|
2013-03-08 05:56:34 +01:00
|
|
|
vp9_zero(pbi->common.fc.nzc_pcat_counts);
|
2013-02-20 19:16:24 +01:00
|
|
|
#endif
|
2012-09-10 07:42:35 +02:00
|
|
|
|
2012-10-18 01:47:38 +02:00
|
|
|
read_coef_probs(pbi, &header_bc);
|
2013-02-20 19:16:24 +01:00
|
|
|
#if CONFIG_CODE_NONZEROCOUNT
|
|
|
|
read_nzc_probs(&pbi->common, &header_bc);
|
|
|
|
#endif
|
2012-02-29 02:11:12 +01:00
|
|
|
|
2013-01-15 22:49:44 +01:00
|
|
|
/* Initialize xd pointers. Any reference should do for xd->pre, so use 0. */
|
|
|
|
vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]],
|
|
|
|
sizeof(YV12_BUFFER_CONFIG));
|
|
|
|
vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx],
|
|
|
|
sizeof(YV12_BUFFER_CONFIG));
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Create the segmentation map structure and set to 0
|
|
|
|
if (!pc->last_frame_seg_map)
|
|
|
|
CHECK_MEM_ERROR(pc->last_frame_seg_map,
|
|
|
|
vpx_calloc((pc->mb_rows * pc->mb_cols), 1));
|
2010-09-24 00:25:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* set up frame new frame for intra coded blocks */
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_setup_block_dptrs(xd);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_build_block_doffsets(xd);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* clear out the coeff buffer */
|
|
|
|
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* Read the mb_no_coeff_skip flag */
|
2012-10-31 22:40:53 +01:00
|
|
|
pc->mb_no_coeff_skip = (int)vp9_read_bit(&header_bc);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-30 22:51:31 +01:00
|
|
|
vp9_decode_mode_mvs_init(pbi, &header_bc);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
/* tile info */
|
|
|
|
{
|
2013-03-14 20:31:54 +01:00
|
|
|
const uint8_t *data_ptr = data + first_partition_length_in_bytes;
|
2013-02-08 20:33:11 +01:00
|
|
|
int tile_row, tile_col, delta_log2_tiles;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
|
2013-02-07 00:30:21 +01:00
|
|
|
vp9_get_tile_n_bits(pc, &pc->log2_tile_columns, &delta_log2_tiles);
|
|
|
|
while (delta_log2_tiles--) {
|
|
|
|
if (vp9_read_bit(&header_bc)) {
|
|
|
|
pc->log2_tile_columns++;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
}
|
2013-02-08 20:33:11 +01:00
|
|
|
pc->log2_tile_rows = vp9_read_bit(&header_bc);
|
|
|
|
if (pc->log2_tile_rows)
|
|
|
|
pc->log2_tile_rows += vp9_read_bit(&header_bc);
|
2013-02-07 00:30:21 +01:00
|
|
|
pc->tile_columns = 1 << pc->log2_tile_columns;
|
2013-02-08 20:33:11 +01:00
|
|
|
pc->tile_rows = 1 << pc->log2_tile_rows;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
vpx_memset(pc->above_context, 0,
|
|
|
|
sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
|
|
|
|
|
|
|
|
if (pbi->oxcf.inv_tile_order) {
|
2013-02-08 20:33:11 +01:00
|
|
|
const int n_cols = pc->tile_columns;
|
2013-03-14 20:31:54 +01:00
|
|
|
const uint8_t *data_ptr2[4][1 << 6];
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
BOOL_DECODER UNINITIALIZED_IS_SAFE(bc_bak);
|
|
|
|
|
2013-02-08 20:33:11 +01:00
|
|
|
// pre-initialize the offsets, we're going to read in inverse order
|
|
|
|
data_ptr2[0][0] = data_ptr;
|
|
|
|
for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
|
|
|
|
if (tile_row) {
|
2013-03-14 20:31:54 +01:00
|
|
|
const int size = read_le32(data_ptr2[tile_row - 1][n_cols - 1]);
|
2013-02-08 20:33:11 +01:00
|
|
|
data_ptr2[tile_row - 1][n_cols - 1] += 4;
|
|
|
|
data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][n_cols - 1] + size;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (tile_col = 1; tile_col < n_cols; tile_col++) {
|
2013-03-14 20:31:54 +01:00
|
|
|
const int size = read_le32(data_ptr2[tile_row][tile_col - 1]);
|
2013-02-08 20:33:11 +01:00
|
|
|
data_ptr2[tile_row][tile_col - 1] += 4;
|
|
|
|
data_ptr2[tile_row][tile_col] =
|
|
|
|
data_ptr2[tile_row][tile_col - 1] + size;
|
|
|
|
}
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
}
|
2013-02-08 20:33:11 +01:00
|
|
|
|
|
|
|
for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
|
|
|
|
vp9_get_tile_row_offsets(pc, tile_row);
|
|
|
|
for (tile_col = n_cols - 1; tile_col >= 0; tile_col--) {
|
|
|
|
vp9_get_tile_col_offsets(pc, tile_col);
|
|
|
|
setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], &residual_bc);
|
|
|
|
|
|
|
|
/* Decode a row of superblocks */
|
|
|
|
for (mb_row = pc->cur_tile_mb_row_start;
|
|
|
|
mb_row < pc->cur_tile_mb_row_end; mb_row += 4) {
|
|
|
|
decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
|
|
|
|
}
|
|
|
|
if (tile_row == pc->tile_rows - 1 && tile_col == n_cols - 1)
|
|
|
|
bc_bak = residual_bc;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
residual_bc = bc_bak;
|
|
|
|
} else {
|
2013-02-08 20:33:11 +01:00
|
|
|
for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
|
|
|
|
vp9_get_tile_row_offsets(pc, tile_row);
|
|
|
|
for (tile_col = 0; tile_col < pc->tile_columns; tile_col++) {
|
|
|
|
vp9_get_tile_col_offsets(pc, tile_col);
|
|
|
|
|
|
|
|
if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1)
|
|
|
|
setup_token_decoder(pbi, data_ptr + 4, &residual_bc);
|
|
|
|
else
|
|
|
|
setup_token_decoder(pbi, data_ptr, &residual_bc);
|
|
|
|
|
|
|
|
/* Decode a row of superblocks */
|
|
|
|
for (mb_row = pc->cur_tile_mb_row_start;
|
|
|
|
mb_row < pc->cur_tile_mb_row_end; mb_row += 4) {
|
|
|
|
decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
|
|
|
|
}
|
2013-02-23 02:27:34 +01:00
|
|
|
|
2013-02-08 20:33:11 +01:00
|
|
|
if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) {
|
2013-03-14 20:31:54 +01:00
|
|
|
int size = read_le32(data_ptr);
|
2013-02-08 20:33:11 +01:00
|
|
|
data_ptr += 4 + size;
|
|
|
|
}
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
corrupt_tokens |= xd->corrupted;
|
|
|
|
|
2013-02-20 21:34:31 +01:00
|
|
|
// keep track of the last coded dimensions
|
2013-03-21 00:41:30 +01:00
|
|
|
pc->last_width = pc->width;
|
|
|
|
pc->last_height = pc->height;
|
2013-02-20 21:34:31 +01:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
// Collect information about decoder corruption.
|
|
|
|
// 1. Check first boolean decoder for errors.
|
|
|
|
// 2. Check the macroblock information
|
|
|
|
pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc) |
|
|
|
|
corrupt_tokens;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
if (!pbi->decoded_key_frame) {
|
2013-03-14 20:31:54 +01:00
|
|
|
if (pc->frame_type == KEY_FRAME && !pc->yv12_fb[pc->new_fb_idx].corrupted)
|
2012-07-14 00:21:29 +02:00
|
|
|
pbi->decoded_key_frame = 1;
|
|
|
|
else
|
|
|
|
vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME,
|
|
|
|
"A stream must start with a complete key frame");
|
|
|
|
}
|
2010-12-16 16:46:31 +01:00
|
|
|
|
2013-03-14 20:31:54 +01:00
|
|
|
if (!pc->error_resilient_mode && !pc->frame_parallel_decoding_mode) {
|
2013-01-15 15:43:35 +01:00
|
|
|
vp9_adapt_coef_probs(pc);
|
2013-02-20 19:16:24 +01:00
|
|
|
#if CONFIG_CODE_NONZEROCOUNT
|
|
|
|
vp9_adapt_nzc_probs(pc);
|
|
|
|
#endif
|
|
|
|
}
|
2013-03-14 20:31:54 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (pc->frame_type != KEY_FRAME) {
|
2013-03-14 20:31:54 +01:00
|
|
|
if (!pc->error_resilient_mode && !pc->frame_parallel_decoding_mode) {
|
2013-01-15 15:43:35 +01:00
|
|
|
vp9_adapt_mode_probs(pc);
|
|
|
|
vp9_adapt_nmv_probs(pc, xd->allow_high_precision_mv);
|
|
|
|
vp9_adapt_mode_context(&pbi->common);
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (pc->refresh_entropy_probs) {
|
2013-01-16 00:57:11 +01:00
|
|
|
vpx_memcpy(&pc->frame_contexts[pc->frame_context_idx], &pc->fc,
|
|
|
|
sizeof(pc->fc));
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
|
|
|
#ifdef PACKET_TESTING
|
2012-07-14 00:21:29 +02:00
|
|
|
{
|
|
|
|
FILE *f = fopen("decompressor.VP8", "ab");
|
2012-10-18 01:47:38 +02:00
|
|
|
unsigned int size = residual_bc.pos + header_bc.pos + 8;
|
2012-07-14 00:21:29 +02:00
|
|
|
fwrite((void *) &size, 4, 1, f);
|
|
|
|
fwrite((void *) pbi->Source, size, 1, f);
|
|
|
|
fclose(f);
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
#endif
|
|
|
|
|
2012-11-15 21:19:07 +01:00
|
|
|
/* Find the end of the coded buffer */
|
|
|
|
while (residual_bc.count > CHAR_BIT
|
|
|
|
&& residual_bc.count < VP9_BD_VALUE_SIZE) {
|
|
|
|
residual_bc.count -= CHAR_BIT;
|
|
|
|
residual_bc.user_buffer--;
|
|
|
|
}
|
|
|
|
*p_data_end = residual_bc.user_buffer;
|
2012-07-14 00:21:29 +02:00
|
|
|
return 0;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|