2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_header.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/encoder/vp9_encodemv.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_entropymode.h"
|
2012-12-10 13:38:48 +01:00
|
|
|
#include "vp9/common/vp9_entropymv.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_findnearmv.h"
|
2013-02-07 00:30:21 +01:00
|
|
|
#include "vp9/common/vp9_tile_common.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/encoder/vp9_mcomp.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_systemdependent.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
#include <assert.h>
|
|
|
|
#include <stdio.h>
|
2011-06-10 11:11:15 +02:00
|
|
|
#include <limits.h>
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_pragmas.h"
|
2011-06-10 11:11:15 +02:00
|
|
|
#include "vpx/vpx_encoder.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
#include "vpx_mem/vpx_mem.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/encoder/vp9_bitstream.h"
|
|
|
|
#include "vp9/encoder/vp9_segmentation.h"
|
2011-08-16 22:21:21 +02:00
|
|
|
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_seg_common.h"
|
|
|
|
#include "vp9/common/vp9_pred_common.h"
|
|
|
|
#include "vp9/common/vp9_entropy.h"
|
|
|
|
#include "vp9/encoder/vp9_encodemv.h"
|
|
|
|
#include "vp9/common/vp9_entropymv.h"
|
|
|
|
#include "vp9/common/vp9_mvref_common.h"
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
#include "vp9/common/vp9_treecoder.h"
|
2012-08-24 16:44:01 +02:00
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
#if defined(SECTIONBITS_OUTPUT)
|
|
|
|
unsigned __int64 Sectionbits[500];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef ENTROPY_STATS
|
2012-10-09 22:19:15 +02:00
|
|
|
int intra_mode_stats[VP9_KF_BINTRAMODES]
|
|
|
|
[VP9_KF_BINTRAMODES]
|
|
|
|
[VP9_KF_BINTRAMODES];
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4];
|
2013-02-15 21:09:05 +01:00
|
|
|
vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4_HYBRID];
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8];
|
2013-02-15 21:09:05 +01:00
|
|
|
vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8_HYBRID];
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16];
|
2013-02-15 21:09:05 +01:00
|
|
|
vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16_HYBRID];
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32];
|
2012-05-15 01:21:01 +02:00
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
extern unsigned int active_section;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef MODE_STATS
|
|
|
|
int count_mb_seg[4] = { 0, 0, 0, 0 };
|
|
|
|
#endif
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
#define vp9_cost_upd ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)) >> 8)
|
|
|
|
#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
|
2012-04-12 18:24:03 +02:00
|
|
|
|
|
|
|
#define SEARCH_NEWP
|
|
|
|
static int update_bits[255];
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
static void compute_update_table() {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < 255; i++)
|
2012-10-30 20:58:42 +01:00
|
|
|
update_bits[i] = vp9_count_term_subexp(i, SUBEXP_PARAM, 255);
|
2012-04-12 18:24:03 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
static int split_index(int i, int n, int modulus) {
|
|
|
|
int max1 = (n - 1 - modulus / 2) / modulus + 1;
|
|
|
|
if (i % modulus == modulus / 2) i = i / modulus;
|
|
|
|
else i = max1 + i - (i + modulus - modulus / 2) / modulus;
|
|
|
|
return i;
|
2012-05-03 11:22:26 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
static int remap_prob(int v, int m) {
|
|
|
|
const int n = 256;
|
|
|
|
const int modulus = MODULUS_PARAM;
|
|
|
|
int i;
|
|
|
|
if ((m << 1) <= n)
|
2012-10-30 05:42:10 +01:00
|
|
|
i = vp9_recenter_nonneg(v, m) - 1;
|
2012-07-14 00:21:29 +02:00
|
|
|
else
|
2012-10-30 05:42:10 +01:00
|
|
|
i = vp9_recenter_nonneg(n - 1 - v, n - 1 - m) - 1;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
i = split_index(i, n - 1, modulus);
|
|
|
|
return i;
|
2012-04-12 18:24:03 +02:00
|
|
|
}
|
2012-05-03 11:22:26 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_prob_diff_update(vp9_writer *const bc,
|
|
|
|
vp9_prob newp, vp9_prob oldp) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int delp = remap_prob(newp, oldp);
|
2012-10-30 20:58:42 +01:00
|
|
|
vp9_encode_term_subexp(bc, delp, SUBEXP_PARAM, 255);
|
2012-05-03 11:22:26 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int delp = remap_prob(newp, oldp);
|
|
|
|
return update_bits[delp] * 256;
|
2012-05-03 11:22:26 +02:00
|
|
|
}
|
2012-03-29 00:19:45 +02:00
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
static void update_mode(
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_writer *const bc,
|
2012-07-14 00:21:29 +02:00
|
|
|
int n,
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_token tok [/* n */],
|
|
|
|
vp9_tree tree,
|
|
|
|
vp9_prob Pnew [/* n-1 */],
|
|
|
|
vp9_prob Pcur [/* n-1 */],
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned int bct [/* n-1 */] [2],
|
|
|
|
const unsigned int num_events[/* n */]
|
|
|
|
) {
|
|
|
|
unsigned int new_b = 0, old_b = 0;
|
|
|
|
int i = 0;
|
|
|
|
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
vp9_tree_probs_from_distribution(n--, tok, tree,
|
|
|
|
Pnew, bct, num_events);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
do {
|
2012-10-31 22:40:53 +01:00
|
|
|
new_b += cost_branch(bct[i], Pnew[i]);
|
|
|
|
old_b += cost_branch(bct[i], Pcur[i]);
|
2012-07-14 00:21:29 +02:00
|
|
|
} while (++i < n);
|
|
|
|
|
|
|
|
if (new_b + (n << 8) < old_b) {
|
2010-05-18 17:58:33 +02:00
|
|
|
int i = 0;
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 1);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
do {
|
2012-10-31 22:40:53 +01:00
|
|
|
const vp9_prob p = Pnew[i];
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(bc, Pcur[i] = p ? p : 1, 8);
|
2012-07-14 00:21:29 +02:00
|
|
|
} while (++i < n);
|
|
|
|
} else
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 0);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
static void update_mbintra_mode_probs(VP9_COMP* const cpi,
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_writer* const bc) {
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const cm = &cpi->common;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
{
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_prob Pnew [VP9_YMODES - 1];
|
|
|
|
unsigned int bct [VP9_YMODES - 1] [2];
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
update_mode(
|
2012-10-31 22:40:53 +01:00
|
|
|
bc, VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree,
|
2012-08-15 12:00:53 +02:00
|
|
|
Pnew, cm->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count
|
2012-07-14 00:21:29 +02:00
|
|
|
);
|
2012-11-16 00:50:07 +01:00
|
|
|
update_mode(bc, VP9_I32X32_MODES, vp9_sb_ymode_encodings,
|
|
|
|
vp9_sb_ymode_tree, Pnew, cm->fc.sb_ymode_prob, bct,
|
|
|
|
(unsigned int *)cpi->sb_ymode_count);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_update_skip_probs(VP9_COMP *cpi) {
|
|
|
|
VP9_COMMON *const pc = &cpi->common;
|
2012-07-14 00:21:29 +02:00
|
|
|
int k;
|
2012-04-11 15:37:48 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
|
2012-10-19 01:27:30 +02:00
|
|
|
pc->mbskip_pred_probs[k] = get_binary_prob(cpi->skip_false_count[k],
|
|
|
|
cpi->skip_true_count[k]);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-04-11 15:37:48 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
static void update_switchable_interp_probs(VP9_COMP *cpi,
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_writer* const bc) {
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const pc = &cpi->common;
|
2012-07-18 22:43:01 +02:00
|
|
|
unsigned int branch_ct[32][2];
|
|
|
|
int i, j;
|
2012-10-31 22:40:53 +01:00
|
|
|
for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
|
2012-10-31 00:25:53 +01:00
|
|
|
vp9_tree_probs_from_distribution(
|
2012-10-31 22:40:53 +01:00
|
|
|
VP9_SWITCHABLE_FILTERS,
|
2012-10-31 01:12:12 +01:00
|
|
|
vp9_switchable_interp_encodings, vp9_switchable_interp_tree,
|
2012-10-22 23:43:01 +02:00
|
|
|
pc->fc.switchable_interp_prob[j], branch_ct,
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
cpi->switchable_interp_count[j]);
|
2012-10-31 22:40:53 +01:00
|
|
|
for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
|
2012-07-18 22:43:01 +02:00
|
|
|
if (pc->fc.switchable_interp_prob[j][i] < 1)
|
|
|
|
pc->fc.switchable_interp_prob[j][i] = 1;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(bc, pc->fc.switchable_interp_prob[j][i], 8);
|
2012-07-18 22:43:01 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-04-11 16:44:14 +02:00
|
|
|
// This function updates the reference frame prediction stats
|
2012-10-31 01:53:32 +01:00
|
|
|
static void update_refpred_stats(VP9_COMP *cpi) {
|
|
|
|
VP9_COMMON *const cm = &cpi->common;
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_prob new_pred_probs[PREDICTION_PROBS];
|
2012-07-14 00:21:29 +02:00
|
|
|
int old_cost, new_cost;
|
|
|
|
|
|
|
|
// Set the prediction probability structures to defaults
|
2013-01-15 15:43:35 +01:00
|
|
|
if (cm->frame_type != KEY_FRAME) {
|
2012-07-14 00:21:29 +02:00
|
|
|
// From the prediction counts set the probabilities for each context
|
|
|
|
for (i = 0; i < PREDICTION_PROBS; i++) {
|
2012-10-19 01:27:30 +02:00
|
|
|
new_pred_probs[i] = get_binary_prob(cpi->ref_pred_count[i][0],
|
|
|
|
cpi->ref_pred_count[i][1]);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Decide whether or not to update the reference frame probs.
|
|
|
|
// Returned costs are in 1/256 bit units.
|
|
|
|
old_cost =
|
2012-10-31 22:40:53 +01:00
|
|
|
(cpi->ref_pred_count[i][0] * vp9_cost_zero(cm->ref_pred_probs[i])) +
|
|
|
|
(cpi->ref_pred_count[i][1] * vp9_cost_one(cm->ref_pred_probs[i]));
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
new_cost =
|
2012-10-31 22:40:53 +01:00
|
|
|
(cpi->ref_pred_count[i][0] * vp9_cost_zero(new_pred_probs[i])) +
|
|
|
|
(cpi->ref_pred_count[i][1] * vp9_cost_one(new_pred_probs[i]));
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Cost saving must be >= 8 bits (2048 in these units)
|
|
|
|
if ((old_cost - new_cost) >= 2048) {
|
|
|
|
cpi->ref_pred_probs_update[i] = 1;
|
|
|
|
cm->ref_pred_probs[i] = new_pred_probs[i];
|
|
|
|
} else
|
|
|
|
cpi->ref_pred_probs_update[i] = 0;
|
2012-04-11 16:44:14 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-04-11 16:44:14 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 17:31:32 +01:00
|
|
|
// This function is called to update the mode probability context used to encode
|
|
|
|
// inter modes. It assumes the branch counts table has already been populated
|
|
|
|
// prior to the actual packing of the bitstream (in rd stage or dummy pack)
|
|
|
|
//
|
|
|
|
// The branch counts table is re-populated during the actual pack stage and in
|
|
|
|
// the decoder to facilitate backwards update of the context.
|
2013-02-06 14:02:53 +01:00
|
|
|
static void update_inter_mode_probs(VP9_COMMON *cm,
|
|
|
|
int mode_context[INTER_MODE_CONTEXTS][4]) {
|
2012-11-16 17:31:32 +01:00
|
|
|
int i, j;
|
2012-11-29 17:13:18 +01:00
|
|
|
unsigned int (*mv_ref_ct)[4][2];
|
2012-11-16 17:31:32 +01:00
|
|
|
|
|
|
|
vpx_memcpy(mode_context, cm->fc.vp9_mode_contexts,
|
|
|
|
sizeof(cm->fc.vp9_mode_contexts));
|
|
|
|
|
|
|
|
mv_ref_ct = cm->fc.mv_ref_ct;
|
|
|
|
|
|
|
|
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
|
|
|
|
for (j = 0; j < 4; j++) {
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
int new_prob, old_cost, new_cost;
|
2012-11-16 17:31:32 +01:00
|
|
|
|
|
|
|
// Work out cost of coding branches with the old and optimal probability
|
|
|
|
old_cost = cost_branch256(mv_ref_ct[i][j], mode_context[i][j]);
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
new_prob = get_binary_prob(mv_ref_ct[i][j][0], mv_ref_ct[i][j][1]);
|
2012-11-16 17:31:32 +01:00
|
|
|
new_cost = cost_branch256(mv_ref_ct[i][j], new_prob);
|
|
|
|
|
|
|
|
// If cost saving is >= 14 bits then update the mode probability.
|
|
|
|
// This is the approximate net cost of updating one probability given
|
|
|
|
// that the no update case ismuch more common than the update case.
|
|
|
|
if (new_cost <= (old_cost - (14 << 8))) {
|
|
|
|
mode_context[i][j] = new_prob;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-12-10 13:38:48 +01:00
|
|
|
|
|
|
|
#if CONFIG_NEW_MVREF
|
|
|
|
static void update_mv_ref_probs(VP9_COMP *cpi,
|
|
|
|
int mvref_probs[MAX_REF_FRAMES]
|
|
|
|
[MAX_MV_REF_CANDIDATES-1]) {
|
|
|
|
MACROBLOCKD *xd = &cpi->mb.e_mbd;
|
|
|
|
int rf; // Reference frame
|
|
|
|
int ref_c; // Motion reference candidate
|
|
|
|
int node; // Probability node index
|
|
|
|
|
|
|
|
for (rf = 0; rf < MAX_REF_FRAMES; ++rf) {
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
// Skip the dummy entry for intra ref frame.
|
|
|
|
if (rf == INTRA_FRAME) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sum the counts for all candidates
|
|
|
|
for (ref_c = 0; ref_c < MAX_MV_REF_CANDIDATES; ++ref_c) {
|
|
|
|
count += cpi->mb_mv_ref_count[rf][ref_c];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate the tree node probabilities
|
|
|
|
for (node = 0; node < MAX_MV_REF_CANDIDATES-1; ++node) {
|
|
|
|
int new_prob, old_cost, new_cost;
|
|
|
|
unsigned int branch_cnts[2];
|
|
|
|
|
|
|
|
// How many hits on each branch at this node
|
|
|
|
branch_cnts[0] = cpi->mb_mv_ref_count[rf][node];
|
|
|
|
branch_cnts[1] = count - cpi->mb_mv_ref_count[rf][node];
|
|
|
|
|
|
|
|
// Work out cost of coding branches with the old and optimal probability
|
|
|
|
old_cost = cost_branch256(branch_cnts, xd->mb_mv_ref_probs[rf][node]);
|
|
|
|
new_prob = get_prob(branch_cnts[0], count);
|
|
|
|
new_cost = cost_branch256(branch_cnts, new_prob);
|
|
|
|
|
|
|
|
// Take current 0 branch cases out of residual count
|
|
|
|
count -= cpi->mb_mv_ref_count[rf][node];
|
|
|
|
|
|
|
|
if ((new_cost + VP9_MV_REF_UPDATE_COST) <= old_cost) {
|
|
|
|
mvref_probs[rf][node] = new_prob;
|
|
|
|
} else {
|
|
|
|
mvref_probs[rf][node] = xd->mb_mv_ref_probs[rf][node];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
|
|
|
|
write_token(bc, vp9_ymode_tree, p, vp9_ymode_encodings + m);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
|
|
|
|
write_token(bc, vp9_kf_ymode_tree, p, vp9_kf_ymode_encodings + m);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-11-16 00:50:07 +01:00
|
|
|
static void write_sb_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
|
|
|
|
write_token(bc, vp9_sb_ymode_tree, p, vp9_sb_ymode_encodings + m);
|
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
|
|
|
|
write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m);
|
2012-08-20 23:43:34 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) {
|
|
|
|
write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m);
|
2011-08-05 01:30:27 +02:00
|
|
|
}
|
2011-12-07 22:03:57 +01:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) {
|
|
|
|
write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
|
2012-10-09 22:19:15 +02:00
|
|
|
#if CONFIG_NEWBINTRAMODES
|
|
|
|
assert(m < B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS || m == B_CONTEXT_PRED);
|
|
|
|
if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
|
|
|
|
#endif
|
2012-10-31 22:40:53 +01:00
|
|
|
write_token(bc, vp9_bmode_tree, p, vp9_bmode_encodings + m);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-10-09 22:19:15 +02:00
|
|
|
static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
|
|
|
|
write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m);
|
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_split(vp9_writer *bc, int x, const vp9_prob *p) {
|
2012-10-09 22:19:15 +02:00
|
|
|
write_token(
|
|
|
|
bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-03-19 19:02:04 +01:00
|
|
|
static int prob_update_savings(const unsigned int *ct,
|
2012-10-31 22:40:53 +01:00
|
|
|
const vp9_prob oldp, const vp9_prob newp,
|
|
|
|
const vp9_prob upd) {
|
|
|
|
const int old_b = cost_branch256(ct, oldp);
|
|
|
|
const int new_b = cost_branch256(ct, newp);
|
|
|
|
const int update_b = 2048 + vp9_cost_upd256;
|
2012-07-14 00:21:29 +02:00
|
|
|
return (old_b - new_b - update_b);
|
2012-04-12 18:24:03 +02:00
|
|
|
}
|
|
|
|
|
2012-05-03 11:22:26 +02:00
|
|
|
static int prob_diff_update_savings(const unsigned int *ct,
|
2012-10-31 22:40:53 +01:00
|
|
|
const vp9_prob oldp, const vp9_prob newp,
|
|
|
|
const vp9_prob upd) {
|
|
|
|
const int old_b = cost_branch256(ct, oldp);
|
|
|
|
const int new_b = cost_branch256(ct, newp);
|
2012-07-14 00:21:29 +02:00
|
|
|
const int update_b = (newp == oldp ? 0 :
|
2012-10-31 22:40:53 +01:00
|
|
|
prob_diff_update_cost(newp, oldp) + vp9_cost_upd256);
|
2012-07-14 00:21:29 +02:00
|
|
|
return (old_b - new_b - update_b);
|
2012-05-03 11:22:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int prob_diff_update_savings_search(const unsigned int *ct,
|
2012-10-31 22:40:53 +01:00
|
|
|
const vp9_prob oldp, vp9_prob *bestp,
|
|
|
|
const vp9_prob upd) {
|
|
|
|
const int old_b = cost_branch256(ct, oldp);
|
2012-07-14 00:21:29 +02:00
|
|
|
int new_b, update_b, savings, bestsavings, step;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_prob newp, bestnewp;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
bestsavings = 0;
|
|
|
|
bestnewp = oldp;
|
|
|
|
|
|
|
|
step = (*bestp > oldp ? -1 : 1);
|
|
|
|
for (newp = *bestp; newp != oldp; newp += step) {
|
2012-10-31 22:40:53 +01:00
|
|
|
new_b = cost_branch256(ct, newp);
|
|
|
|
update_b = prob_diff_update_cost(newp, oldp) + vp9_cost_upd256;
|
2012-07-14 00:21:29 +02:00
|
|
|
savings = old_b - new_b - update_b;
|
|
|
|
if (savings > bestsavings) {
|
|
|
|
bestsavings = savings;
|
|
|
|
bestnewp = newp;
|
2012-04-12 18:24:03 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
*bestp = bestnewp;
|
|
|
|
return bestsavings;
|
2012-03-19 19:02:04 +01:00
|
|
|
}
|
|
|
|
|
2012-11-07 15:50:25 +01:00
|
|
|
static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd,
|
|
|
|
unsigned int *ct) {
|
|
|
|
vp9_prob newp;
|
|
|
|
int savings;
|
|
|
|
newp = get_binary_prob(ct[0], ct[1]);
|
|
|
|
savings = prob_update_savings(ct, *oldp, newp, upd);
|
|
|
|
if (savings > 0) {
|
|
|
|
vp9_write(bc, 1, upd);
|
|
|
|
vp9_write_literal(bc, newp, 8);
|
|
|
|
*oldp = newp;
|
|
|
|
} else {
|
|
|
|
vp9_write(bc, 0, upd);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void pack_mb_tokens(vp9_writer* const bc,
|
2012-10-16 22:52:39 +02:00
|
|
|
TOKENEXTRA **tp,
|
|
|
|
const TOKENEXTRA *const stop) {
|
|
|
|
TOKENEXTRA *p = *tp;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
while (p < stop) {
|
|
|
|
const int t = p->Token;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_token *const a = vp9_coef_encodings + t;
|
|
|
|
const vp9_extra_bit_struct *const b = vp9_extra_bits + t;
|
2012-07-14 00:21:29 +02:00
|
|
|
int i = 0;
|
|
|
|
const unsigned char *pp = p->context_tree;
|
|
|
|
int v = a->value;
|
|
|
|
int n = a->Len;
|
|
|
|
|
2012-10-16 22:52:39 +02:00
|
|
|
if (t == EOSB_TOKEN)
|
|
|
|
{
|
|
|
|
++p;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* skip one or two nodes */
|
|
|
|
if (p->skip_eob_node) {
|
|
|
|
n -= p->skip_eob_node;
|
|
|
|
i = 2 * p->skip_eob_node;
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
do {
|
|
|
|
const int bb = (v >> --n) & 1;
|
2012-11-16 19:48:23 +01:00
|
|
|
encode_bool(bc, bb, pp[i >> 1]);
|
2012-10-31 01:12:12 +01:00
|
|
|
i = vp9_coef_tree[i + bb];
|
2012-07-14 00:21:29 +02:00
|
|
|
} while (n);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (b->base_val) {
|
|
|
|
const int e = p->Extra, L = b->Len;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (L) {
|
|
|
|
const unsigned char *pp = b->prob;
|
|
|
|
int v = e >> 1;
|
|
|
|
int n = L; /* number of bits in v, assumed nonzero */
|
|
|
|
int i = 0;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
do {
|
|
|
|
const int bb = (v >> --n) & 1;
|
2012-11-16 19:48:23 +01:00
|
|
|
encode_bool(bc, bb, pp[i >> 1]);
|
2012-07-14 00:21:29 +02:00
|
|
|
i = b->tree[i + bb];
|
|
|
|
} while (n);
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-11-16 19:48:23 +01:00
|
|
|
encode_bool(bc, e & 1, 128);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
++p;
|
|
|
|
}
|
|
|
|
|
2012-10-16 22:52:39 +02:00
|
|
|
*tp = p;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
static void write_partition_size(unsigned char *cx_data, int size) {
|
|
|
|
signed char csize;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
csize = size & 0xff;
|
|
|
|
*cx_data = csize;
|
|
|
|
csize = (size >> 8) & 0xff;
|
|
|
|
*(cx_data + 1) = csize;
|
|
|
|
csize = (size >> 16) & 0xff;
|
|
|
|
*(cx_data + 2) = csize;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_mv_ref
|
|
|
|
(
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p
|
2012-07-14 00:21:29 +02:00
|
|
|
) {
|
2011-06-07 15:34:47 +02:00
|
|
|
#if CONFIG_DEBUG
|
2012-07-14 00:21:29 +02:00
|
|
|
assert(NEARESTMV <= m && m <= SPLITMV);
|
2011-06-07 15:34:47 +02:00
|
|
|
#endif
|
2012-10-31 22:40:53 +01:00
|
|
|
write_token(bc, vp9_mv_ref_tree, p,
|
|
|
|
vp9_mv_ref_encoding_array - NEARESTMV + m);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_sb_mv_ref(vp9_writer *bc, MB_PREDICTION_MODE m,
|
|
|
|
const vp9_prob *p) {
|
2012-08-20 23:43:34 +02:00
|
|
|
#if CONFIG_DEBUG
|
|
|
|
assert(NEARESTMV <= m && m < SPLITMV);
|
|
|
|
#endif
|
2012-10-31 22:40:53 +01:00
|
|
|
write_token(bc, vp9_sb_mv_ref_tree, p,
|
|
|
|
vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
|
2012-08-20 23:43:34 +02:00
|
|
|
}
|
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
static void write_sub_mv_ref
|
|
|
|
(
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_writer *bc, B_PREDICTION_MODE m, const vp9_prob *p
|
2012-07-14 00:21:29 +02:00
|
|
|
) {
|
2011-06-07 15:34:47 +02:00
|
|
|
#if CONFIG_DEBUG
|
2012-07-14 00:21:29 +02:00
|
|
|
assert(LEFT4X4 <= m && m <= NEW4X4);
|
2011-06-07 15:34:47 +02:00
|
|
|
#endif
|
2012-10-31 22:40:53 +01:00
|
|
|
write_token(bc, vp9_sub_mv_ref_tree, p,
|
|
|
|
vp9_sub_mv_ref_encoding_array - LEFT4X4 + m);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2013-01-15 15:43:35 +01:00
|
|
|
static void write_nmv(VP9_COMP *cpi, vp9_writer *bc,
|
|
|
|
const MV *mv, const int_mv *ref,
|
2012-10-18 01:47:38 +02:00
|
|
|
const nmv_context *nmvc, int usehp) {
|
2012-07-26 22:42:07 +02:00
|
|
|
MV e;
|
|
|
|
e.row = mv->row - ref->as_mv.row;
|
|
|
|
e.col = mv->col - ref->as_mv.col;
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
vp9_encode_nmv(bc, &e, &ref->as_mv, nmvc);
|
|
|
|
vp9_encode_nmv_fp(bc, &e, &ref->as_mv, nmvc, usehp);
|
2012-07-26 22:42:07 +02:00
|
|
|
}
|
|
|
|
|
2012-10-25 14:58:21 +02:00
|
|
|
#if CONFIG_NEW_MVREF
|
2012-10-31 22:40:53 +01:00
|
|
|
static void vp9_write_mv_ref_id(vp9_writer *w,
|
|
|
|
vp9_prob * ref_id_probs,
|
2012-10-25 14:58:21 +02:00
|
|
|
int mv_ref_id) {
|
|
|
|
// Encode the index for the MV reference.
|
|
|
|
switch (mv_ref_id) {
|
|
|
|
case 0:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(w, 0, ref_id_probs[0]);
|
2012-10-25 14:58:21 +02:00
|
|
|
break;
|
|
|
|
case 1:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(w, 1, ref_id_probs[0]);
|
|
|
|
vp9_write(w, 0, ref_id_probs[1]);
|
2012-10-25 14:58:21 +02:00
|
|
|
break;
|
|
|
|
case 2:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(w, 1, ref_id_probs[0]);
|
|
|
|
vp9_write(w, 1, ref_id_probs[1]);
|
|
|
|
vp9_write(w, 0, ref_id_probs[2]);
|
2012-10-25 14:58:21 +02:00
|
|
|
break;
|
|
|
|
case 3:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(w, 1, ref_id_probs[0]);
|
|
|
|
vp9_write(w, 1, ref_id_probs[1]);
|
|
|
|
vp9_write(w, 1, ref_id_probs[2]);
|
2012-10-25 14:58:21 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
// TRAP.. This should not happen
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-11-11 11:10:06 +01:00
|
|
|
// This function writes the current macro block's segnment id to the bitstream
|
|
|
|
// It should only be called if a segment map update is indicated.
|
2012-10-31 22:40:53 +01:00
|
|
|
static void write_mb_segid(vp9_writer *bc,
|
2012-08-15 12:00:53 +02:00
|
|
|
const MB_MODE_INFO *mi, const MACROBLOCKD *xd) {
|
2012-07-14 00:21:29 +02:00
|
|
|
// Encode the MB segment id.
|
2012-10-30 01:58:18 +01:00
|
|
|
int seg_id = mi->segment_id;
|
2013-01-06 03:20:25 +01:00
|
|
|
|
2012-08-15 12:00:53 +02:00
|
|
|
if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
|
2012-10-30 01:58:18 +01:00
|
|
|
switch (seg_id) {
|
2012-07-14 00:21:29 +02:00
|
|
|
case 0:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
|
|
|
|
vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
|
2012-07-14 00:21:29 +02:00
|
|
|
break;
|
|
|
|
case 1:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
|
|
|
|
vp9_write(bc, 1, xd->mb_segment_tree_probs[1]);
|
2012-07-14 00:21:29 +02:00
|
|
|
break;
|
|
|
|
case 2:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
|
|
|
|
vp9_write(bc, 0, xd->mb_segment_tree_probs[2]);
|
2012-07-14 00:21:29 +02:00
|
|
|
break;
|
|
|
|
case 3:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
|
|
|
|
vp9_write(bc, 1, xd->mb_segment_tree_probs[2]);
|
2012-07-14 00:21:29 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
// TRAP.. This should not happen
|
|
|
|
default:
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
|
|
|
|
vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
|
2012-07-14 00:21:29 +02:00
|
|
|
break;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2013-01-30 18:30:46 +01:00
|
|
|
static void write_mb_segid_except(VP9_COMMON *cm,
|
|
|
|
vp9_writer *bc,
|
|
|
|
const MB_MODE_INFO *mi,
|
|
|
|
const MACROBLOCKD *xd,
|
|
|
|
int mb_row, int mb_col) {
|
|
|
|
// Encode the MB segment id.
|
|
|
|
int seg_id = mi->segment_id;
|
|
|
|
int pred_seg_id = vp9_get_pred_mb_segid(cm, xd,
|
|
|
|
mb_row * cm->mb_cols + mb_col);
|
|
|
|
const vp9_prob *p = xd->mb_segment_tree_probs;
|
|
|
|
const vp9_prob p1 = xd->mb_segment_mispred_tree_probs[pred_seg_id];
|
|
|
|
|
|
|
|
if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
|
|
|
|
vp9_write(bc, seg_id >= 2, p1);
|
|
|
|
if (pred_seg_id >= 2 && seg_id < 2) {
|
|
|
|
vp9_write(bc, seg_id == 1, p[1]);
|
|
|
|
} else if (pred_seg_id < 2 && seg_id >= 2) {
|
|
|
|
vp9_write(bc, seg_id == 3, p[2]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-02 14:30:10 +01:00
|
|
|
// This function encodes the reference frame
|
2012-10-31 22:40:53 +01:00
|
|
|
static void encode_ref_frame(vp9_writer *const bc,
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const cm,
|
2012-07-14 00:21:29 +02:00
|
|
|
MACROBLOCKD *xd,
|
|
|
|
int segment_id,
|
|
|
|
MV_REFERENCE_FRAME rf) {
|
|
|
|
int seg_ref_active;
|
|
|
|
int seg_ref_count = 0;
|
2012-10-30 06:15:27 +01:00
|
|
|
seg_ref_active = vp9_segfeature_active(xd,
|
|
|
|
segment_id,
|
|
|
|
SEG_LVL_REF_FRAME);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
if (seg_ref_active) {
|
2012-10-30 06:15:27 +01:00
|
|
|
seg_ref_count = vp9_check_segref(xd, segment_id, INTRA_FRAME) +
|
|
|
|
vp9_check_segref(xd, segment_id, LAST_FRAME) +
|
|
|
|
vp9_check_segref(xd, segment_id, GOLDEN_FRAME) +
|
|
|
|
vp9_check_segref(xd, segment_id, ALTREF_FRAME);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// If segment level coding of this signal is disabled...
|
|
|
|
// or the segment allows multiple reference frame options
|
|
|
|
if (!seg_ref_active || (seg_ref_count > 1)) {
|
|
|
|
// Values used in prediction model coding
|
|
|
|
unsigned char prediction_flag;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_prob pred_prob;
|
2012-07-14 00:21:29 +02:00
|
|
|
MV_REFERENCE_FRAME pred_rf;
|
|
|
|
|
|
|
|
// Get the context probability the prediction flag
|
2012-10-29 14:44:18 +01:00
|
|
|
pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Get the predicted value.
|
2012-10-29 14:44:18 +01:00
|
|
|
pred_rf = vp9_get_pred_ref(cm, xd);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Did the chosen reference frame match its predicted value.
|
|
|
|
prediction_flag =
|
|
|
|
(xd->mode_info_context->mbmi.ref_frame == pred_rf);
|
|
|
|
|
2012-10-29 14:44:18 +01:00
|
|
|
vp9_set_pred_flag(xd, PRED_REF, prediction_flag);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, prediction_flag, pred_prob);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// If not predicted correctly then code value explicitly
|
|
|
|
if (!prediction_flag) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_prob mod_refprobs[PREDICTION_PROBS];
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
vpx_memcpy(mod_refprobs,
|
|
|
|
cm->mod_refprobs[pred_rf], sizeof(mod_refprobs));
|
|
|
|
|
|
|
|
// If segment coding enabled blank out options that cant occur by
|
|
|
|
// setting the branch probability to 0.
|
|
|
|
if (seg_ref_active) {
|
|
|
|
mod_refprobs[INTRA_FRAME] *=
|
2012-10-30 06:15:27 +01:00
|
|
|
vp9_check_segref(xd, segment_id, INTRA_FRAME);
|
2012-07-14 00:21:29 +02:00
|
|
|
mod_refprobs[LAST_FRAME] *=
|
2012-10-30 06:15:27 +01:00
|
|
|
vp9_check_segref(xd, segment_id, LAST_FRAME);
|
2012-07-14 00:21:29 +02:00
|
|
|
mod_refprobs[GOLDEN_FRAME] *=
|
2012-10-30 06:15:27 +01:00
|
|
|
(vp9_check_segref(xd, segment_id, GOLDEN_FRAME) *
|
|
|
|
vp9_check_segref(xd, segment_id, ALTREF_FRAME));
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (mod_refprobs[0]) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, (rf != INTRA_FRAME), mod_refprobs[0]);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Inter coded
|
|
|
|
if (rf != INTRA_FRAME) {
|
|
|
|
if (mod_refprobs[1]) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, (rf != LAST_FRAME), mod_refprobs[1]);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-01-28 13:20:14 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (rf != LAST_FRAME) {
|
|
|
|
if (mod_refprobs[2]) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, (rf != GOLDEN_FRAME), mod_refprobs[2]);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-01-28 13:20:14 +01:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2011-11-02 14:30:10 +01:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-01-31 13:45:30 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// if using the prediction mdoel we have nothing further to do because
|
|
|
|
// the reference frame is fully coded by the segment
|
2011-11-02 14:30:10 +01:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-02-03 14:46:18 +01:00
|
|
|
// Update the probabilities used to encode reference frame data
|
2012-10-31 01:53:32 +01:00
|
|
|
static void update_ref_probs(VP9_COMP *const cpi) {
|
|
|
|
VP9_COMMON *const cm = &cpi->common;
|
2012-02-03 14:46:18 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
const int *const rfct = cpi->count_mb_ref_frame_usage;
|
|
|
|
const int rf_intra = rfct[INTRA_FRAME];
|
|
|
|
const int rf_inter = rfct[LAST_FRAME] +
|
|
|
|
rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
|
2012-02-03 14:46:18 +01:00
|
|
|
|
2012-10-19 01:27:30 +02:00
|
|
|
cm->prob_intra_coded = get_binary_prob(rf_intra, rf_inter);
|
|
|
|
cm->prob_last_coded = get_prob(rfct[LAST_FRAME], rf_inter);
|
|
|
|
cm->prob_gf_coded = get_binary_prob(rfct[GOLDEN_FRAME], rfct[ALTREF_FRAME]);
|
2012-02-03 14:46:18 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Compute a modified set of probabilities to use when prediction of the
|
|
|
|
// reference frame fails
|
2012-10-29 14:44:18 +01:00
|
|
|
vp9_compute_mod_refprobs(cm);
|
2012-02-03 14:46:18 +01:00
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
|
|
|
|
vp9_writer *bc,
|
|
|
|
int mb_rows_left, int mb_cols_left) {
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const pc = &cpi->common;
|
2012-07-26 22:42:07 +02:00
|
|
|
const nmv_context *nmvc = &pc->fc.nmvc;
|
2013-01-06 03:20:25 +01:00
|
|
|
MACROBLOCK *const x = &cpi->mb;
|
|
|
|
MACROBLOCKD *const xd = &x->e_mbd;
|
2012-07-14 00:21:29 +02:00
|
|
|
const int mis = pc->mode_info_stride;
|
2013-01-06 03:20:25 +01:00
|
|
|
MB_MODE_INFO *const mi = &m->mbmi;
|
|
|
|
const MV_REFERENCE_FRAME rf = mi->ref_frame;
|
|
|
|
const MB_PREDICTION_MODE mode = mi->mode;
|
|
|
|
const int segment_id = mi->segment_id;
|
|
|
|
const int mb_size = 1 << mi->sb_type;
|
|
|
|
int skip_coeff;
|
2012-04-07 01:38:34 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
int mb_row = pc->mb_rows - mb_rows_left;
|
|
|
|
int mb_col = pc->mb_cols - mb_cols_left;
|
|
|
|
xd->prev_mode_info_context = pc->prev_mi + (m - pc->mi);
|
|
|
|
x->partition_info = x->pi + (m - pc->mi);
|
2012-05-31 18:51:54 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
// Distance of Mb to the various image edges.
|
|
|
|
// These specified to 8th pel as they are always compared to MV
|
|
|
|
// values that are in 1/8th pel units
|
|
|
|
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
|
|
|
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
|
|
|
|
xd->mb_to_right_edge = ((pc->mb_cols - mb_size - mb_col) * 16) << 3;
|
|
|
|
xd->mb_to_bottom_edge = ((pc->mb_rows - mb_size - mb_row) * 16) << 3;
|
2012-11-13 00:43:11 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
active_section = 9;
|
2012-11-13 00:43:11 +01:00
|
|
|
#endif
|
2012-01-28 11:24:43 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (cpi->mb.e_mbd.update_mb_segmentation_map) {
|
|
|
|
// Is temporal coding of the segment map enabled
|
|
|
|
if (pc->temporal_update) {
|
|
|
|
unsigned char prediction_flag = vp9_get_pred_flag(xd, PRED_SEG_ID);
|
|
|
|
vp9_prob pred_prob = vp9_get_pred_prob(pc, xd, PRED_SEG_ID);
|
2011-11-02 14:30:10 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
// Code the segment id prediction flag for this mb
|
|
|
|
vp9_write(bc, prediction_flag, pred_prob);
|
2012-01-28 11:24:43 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
// If the mb segment id wasn't predicted code explicitly
|
|
|
|
if (!prediction_flag)
|
2013-01-30 18:30:46 +01:00
|
|
|
write_mb_segid_except(pc, bc, mi, &cpi->mb.e_mbd, mb_row, mb_col);
|
2013-01-06 03:20:25 +01:00
|
|
|
} else {
|
|
|
|
// Normal unpredicted coding
|
|
|
|
write_mb_segid(bc, mi, &cpi->mb.e_mbd);
|
|
|
|
}
|
|
|
|
}
|
2011-11-15 16:22:26 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (!pc->mb_no_coeff_skip) {
|
|
|
|
skip_coeff = 0;
|
2013-01-28 16:22:53 +01:00
|
|
|
} else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
|
2013-01-06 03:20:25 +01:00
|
|
|
skip_coeff = 1;
|
|
|
|
} else {
|
|
|
|
const int nmbs = mb_size;
|
|
|
|
const int xmbs = MIN(nmbs, mb_cols_left);
|
|
|
|
const int ymbs = MIN(nmbs, mb_rows_left);
|
|
|
|
int x, y;
|
|
|
|
|
|
|
|
skip_coeff = 1;
|
|
|
|
for (y = 0; y < ymbs; y++) {
|
|
|
|
for (x = 0; x < xmbs; x++) {
|
|
|
|
skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff;
|
|
|
|
}
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
vp9_write(bc, skip_coeff,
|
|
|
|
vp9_get_pred_prob(pc, xd, PRED_MBSKIP));
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
// Encode the reference frame.
|
2013-01-28 16:22:53 +01:00
|
|
|
encode_ref_frame(bc, pc, xd, segment_id, rf);
|
2011-11-02 14:30:10 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (rf == INTRA_FRAME) {
|
2012-02-27 19:22:38 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
2013-01-06 03:20:25 +01:00
|
|
|
active_section = 6;
|
2012-02-27 19:22:38 +01:00
|
|
|
#endif
|
2011-11-02 14:30:10 +01:00
|
|
|
|
2013-01-28 16:22:53 +01:00
|
|
|
if (m->mbmi.sb_type)
|
|
|
|
write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
|
|
|
|
else
|
|
|
|
write_ymode(bc, mode, pc->fc.ymode_prob);
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (mode == B_PRED) {
|
|
|
|
int j = 0;
|
|
|
|
do {
|
|
|
|
write_bmode(bc, m->bmi[j].as_mode.first,
|
|
|
|
pc->fc.bmode_prob);
|
|
|
|
} while (++j < 16);
|
|
|
|
}
|
|
|
|
if (mode == I8X8_PRED) {
|
|
|
|
write_i8x8_mode(bc, m->bmi[0].as_mode.first,
|
|
|
|
pc->fc.i8x8_mode_prob);
|
|
|
|
write_i8x8_mode(bc, m->bmi[2].as_mode.first,
|
|
|
|
pc->fc.i8x8_mode_prob);
|
|
|
|
write_i8x8_mode(bc, m->bmi[8].as_mode.first,
|
|
|
|
pc->fc.i8x8_mode_prob);
|
|
|
|
write_i8x8_mode(bc, m->bmi[10].as_mode.first,
|
|
|
|
pc->fc.i8x8_mode_prob);
|
|
|
|
} else {
|
|
|
|
write_uv_mode(bc, mi->uv_mode,
|
|
|
|
pc->fc.uv_mode_prob[mode]);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
vp9_prob mv_ref_p[VP9_MVREFS - 1];
|
2012-10-25 14:58:21 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]);
|
2011-12-06 21:03:42 +01:00
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2013-01-06 03:20:25 +01:00
|
|
|
accum_mv_refs(mode, ct);
|
|
|
|
active_section = 3;
|
2010-05-18 17:58:33 +02:00
|
|
|
#endif
|
|
|
|
|
2013-02-06 14:02:53 +01:00
|
|
|
// If segment skip is not enabled code the mode.
|
2013-01-28 16:22:53 +01:00
|
|
|
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
|
2013-01-06 03:20:25 +01:00
|
|
|
if (mi->sb_type) {
|
|
|
|
write_sb_mv_ref(bc, mode, mv_ref_p);
|
2013-01-08 19:29:22 +01:00
|
|
|
} else {
|
2013-01-06 03:20:25 +01:00
|
|
|
write_mv_ref(bc, mode, mv_ref_p);
|
|
|
|
}
|
|
|
|
vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]);
|
|
|
|
}
|
2011-11-03 13:50:09 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (mode >= NEARESTMV && mode <= SPLITMV) {
|
|
|
|
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
|
|
|
|
write_token(bc, vp9_switchable_interp_tree,
|
|
|
|
vp9_get_pred_probs(&cpi->common, xd,
|
|
|
|
PRED_SWITCHABLE_INTERP),
|
|
|
|
vp9_switchable_interp_encodings +
|
|
|
|
vp9_switchable_interp_map[mi->interp_filter]);
|
|
|
|
} else {
|
|
|
|
assert(mi->interp_filter == cpi->common.mcomp_filter_type);
|
|
|
|
}
|
|
|
|
}
|
2012-11-08 20:03:00 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
// does the feature use compound prediction or not
|
|
|
|
// (if not specified at the frame/segment level)
|
|
|
|
if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
|
|
|
|
vp9_write(bc, mi->second_ref_frame > INTRA_FRAME,
|
|
|
|
vp9_get_pred_prob(pc, xd, PRED_COMP));
|
|
|
|
}
|
2012-11-07 15:50:25 +01:00
|
|
|
#if CONFIG_COMP_INTERINTRA_PRED
|
2013-01-06 03:20:25 +01:00
|
|
|
if (cpi->common.use_interintra &&
|
|
|
|
mode >= NEARESTMV && mode < SPLITMV &&
|
|
|
|
mi->second_ref_frame <= INTRA_FRAME) {
|
|
|
|
vp9_write(bc, mi->second_ref_frame == INTRA_FRAME,
|
|
|
|
pc->fc.interintra_prob);
|
|
|
|
// if (!cpi->dummy_packing)
|
|
|
|
// printf("-- %d (%d)\n", mi->second_ref_frame == INTRA_FRAME,
|
|
|
|
// pc->fc.interintra_prob);
|
|
|
|
if (mi->second_ref_frame == INTRA_FRAME) {
|
|
|
|
// if (!cpi->dummy_packing)
|
|
|
|
// printf("** %d %d\n", mi->interintra_mode,
|
|
|
|
// mi->interintra_uv_mode);
|
|
|
|
write_ymode(bc, mi->interintra_mode, pc->fc.ymode_prob);
|
2012-11-07 15:50:25 +01:00
|
|
|
#if SEPARATE_INTERINTRA_UV
|
2013-01-06 03:20:25 +01:00
|
|
|
write_uv_mode(bc, mi->interintra_uv_mode,
|
|
|
|
pc->fc.uv_mode_prob[mi->interintra_mode]);
|
2012-11-07 15:50:25 +01:00
|
|
|
#endif
|
2013-01-06 03:20:25 +01:00
|
|
|
}
|
|
|
|
}
|
2012-11-07 15:50:25 +01:00
|
|
|
#endif
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-12-05 17:23:38 +01:00
|
|
|
#if CONFIG_NEW_MVREF
|
2013-01-06 03:20:25 +01:00
|
|
|
// if ((mode == NEWMV) || (mode == SPLITMV)) {
|
|
|
|
if (mode == NEWMV) {
|
|
|
|
// Encode the index of the choice.
|
|
|
|
vp9_write_mv_ref_id(bc,
|
|
|
|
xd->mb_mv_ref_probs[rf], mi->best_index);
|
|
|
|
|
|
|
|
if (mi->second_ref_frame > 0) {
|
|
|
|
// Encode the index of the choice.
|
|
|
|
vp9_write_mv_ref_id(
|
|
|
|
bc, xd->mb_mv_ref_probs[mi->second_ref_frame],
|
|
|
|
mi->best_second_index);
|
|
|
|
}
|
|
|
|
}
|
2012-12-05 17:23:38 +01:00
|
|
|
#endif
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
switch (mode) { /* new, split require MVs */
|
|
|
|
case NEWMV:
|
2012-02-27 19:22:38 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
2013-01-06 03:20:25 +01:00
|
|
|
active_section = 5;
|
2012-02-27 19:22:38 +01:00
|
|
|
#endif
|
2013-01-15 15:43:35 +01:00
|
|
|
write_nmv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv,
|
2013-01-06 03:20:25 +01:00
|
|
|
(const nmv_context*) nmvc,
|
|
|
|
xd->allow_high_precision_mv);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (mi->second_ref_frame > 0) {
|
2013-01-15 15:43:35 +01:00
|
|
|
write_nmv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv,
|
2013-01-06 03:20:25 +01:00
|
|
|
(const nmv_context*) nmvc,
|
|
|
|
xd->allow_high_precision_mv);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case SPLITMV: {
|
|
|
|
int j = 0;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-02-27 19:22:38 +01:00
|
|
|
#ifdef MODE_STATS
|
2013-01-06 03:20:25 +01:00
|
|
|
++count_mb_seg[mi->partitioning];
|
2012-02-27 19:22:38 +01:00
|
|
|
#endif
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob);
|
|
|
|
cpi->mbsplit_count[mi->partitioning]++;
|
2012-04-07 01:38:34 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
do {
|
|
|
|
B_PREDICTION_MODE blockmode;
|
|
|
|
int_mv blockmv;
|
|
|
|
const int *const L = vp9_mbsplits[mi->partitioning];
|
|
|
|
int k = -1; /* first block in subset j */
|
|
|
|
int mv_contz;
|
|
|
|
int_mv leftmv, abovemv;
|
|
|
|
|
|
|
|
blockmode = cpi->mb.partition_info->bmi[j].mode;
|
|
|
|
blockmv = cpi->mb.partition_info->bmi[j].mv;
|
2012-02-27 19:22:38 +01:00
|
|
|
#if CONFIG_DEBUG
|
2013-01-06 03:20:25 +01:00
|
|
|
while (j != L[++k])
|
|
|
|
if (k >= 16)
|
|
|
|
assert(0);
|
2012-02-27 19:22:38 +01:00
|
|
|
#else
|
2013-01-06 03:20:25 +01:00
|
|
|
while (j != L[++k]);
|
2012-02-27 19:22:38 +01:00
|
|
|
#endif
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
leftmv.as_int = left_block_mv(xd, m, k);
|
2013-01-06 03:20:25 +01:00
|
|
|
abovemv.as_int = above_block_mv(m, k, mis);
|
|
|
|
mv_contz = vp9_mv_cont(&leftmv, &abovemv);
|
2011-09-30 17:45:16 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
write_sub_mv_ref(bc, blockmode,
|
|
|
|
cpi->common.fc.sub_mv_ref_prob[mv_contz]);
|
|
|
|
cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++;
|
|
|
|
if (blockmode == NEW4X4) {
|
2012-02-27 19:22:38 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
2013-01-06 03:20:25 +01:00
|
|
|
active_section = 11;
|
|
|
|
#endif
|
2013-01-15 15:43:35 +01:00
|
|
|
write_nmv(cpi, bc, &blockmv.as_mv, &mi->best_mv,
|
2013-01-06 03:20:25 +01:00
|
|
|
(const nmv_context*) nmvc,
|
|
|
|
xd->allow_high_precision_mv);
|
|
|
|
|
|
|
|
if (mi->second_ref_frame > 0) {
|
2013-01-15 15:43:35 +01:00
|
|
|
write_nmv(cpi, bc,
|
2013-01-06 03:20:25 +01:00
|
|
|
&cpi->mb.partition_info->bmi[j].second_mv.as_mv,
|
|
|
|
&mi->best_second_mv,
|
|
|
|
(const nmv_context*) nmvc,
|
|
|
|
xd->allow_high_precision_mv);
|
2012-04-07 01:38:34 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2013-01-06 03:20:25 +01:00
|
|
|
} while (++j < cpi->mb.partition_info->count);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (((rf == INTRA_FRAME && mode <= I8X8_PRED) ||
|
|
|
|
(rf != INTRA_FRAME && !(mode == SPLITMV &&
|
|
|
|
mi->partitioning == PARTITIONING_4X4))) &&
|
|
|
|
pc->txfm_mode == TX_MODE_SELECT &&
|
|
|
|
!((pc->mb_no_coeff_skip && skip_coeff) ||
|
2013-01-28 16:22:53 +01:00
|
|
|
(vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
|
2013-01-06 03:20:25 +01:00
|
|
|
TX_SIZE sz = mi->txfm_size;
|
|
|
|
// FIXME(rbultje) code ternary symbol once all experiments are merged
|
|
|
|
vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
|
|
|
|
if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) {
|
|
|
|
vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]);
|
|
|
|
if (mi->sb_type && sz != TX_8X8)
|
|
|
|
vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-02-02 18:04:40 +01:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
static void write_mb_modes_kf(const VP9_COMP *cpi,
|
|
|
|
const MODE_INFO *m,
|
|
|
|
vp9_writer *bc,
|
|
|
|
int mb_rows_left, int mb_cols_left) {
|
|
|
|
const VP9_COMMON *const c = &cpi->common;
|
|
|
|
const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
|
|
|
|
const int mis = c->mode_info_stride;
|
|
|
|
const int ym = m->mbmi.mode;
|
|
|
|
const int segment_id = m->mbmi.segment_id;
|
|
|
|
int skip_coeff;
|
2012-10-16 22:52:39 +02:00
|
|
|
|
|
|
|
if (xd->update_mb_segmentation_map) {
|
|
|
|
write_mb_segid(bc, &m->mbmi, xd);
|
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (!c->mb_no_coeff_skip) {
|
|
|
|
skip_coeff = 0;
|
2013-01-28 16:22:53 +01:00
|
|
|
} else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
|
2013-01-06 03:20:25 +01:00
|
|
|
skip_coeff = 1;
|
|
|
|
} else {
|
|
|
|
const int nmbs = 1 << m->mbmi.sb_type;
|
|
|
|
const int xmbs = MIN(nmbs, mb_cols_left);
|
|
|
|
const int ymbs = MIN(nmbs, mb_rows_left);
|
|
|
|
int x, y;
|
|
|
|
|
|
|
|
skip_coeff = 1;
|
|
|
|
for (y = 0; y < ymbs; y++) {
|
|
|
|
for (x = 0; x < xmbs; x++) {
|
|
|
|
skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
vp9_write(bc, skip_coeff,
|
|
|
|
vp9_get_pred_prob(c, xd, PRED_MBSKIP));
|
2012-10-16 22:52:39 +02:00
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (m->mbmi.sb_type) {
|
2012-10-16 22:52:39 +02:00
|
|
|
sb_kfwrite_ymode(bc, ym,
|
|
|
|
c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
|
2013-01-08 19:29:22 +01:00
|
|
|
} else {
|
2012-10-16 22:52:39 +02:00
|
|
|
kfwrite_ymode(bc, ym,
|
|
|
|
c->kf_ymode_prob[c->kf_ymode_probs_index]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ym == B_PRED) {
|
|
|
|
int i = 0;
|
|
|
|
do {
|
|
|
|
const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
const B_PREDICTION_MODE L = (xd->left_available || (i & 3)) ?
|
|
|
|
left_block_mode(m, i) : B_DC_PRED;
|
2012-10-16 22:52:39 +02:00
|
|
|
const int bm = m->bmi[i].as_mode.first;
|
|
|
|
|
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
++intra_mode_stats [A] [L] [bm];
|
|
|
|
#endif
|
|
|
|
|
2012-10-09 22:19:15 +02:00
|
|
|
write_kf_bmode(bc, bm, c->kf_bmode_prob[A][L]);
|
2012-10-16 22:52:39 +02:00
|
|
|
} while (++i < 16);
|
|
|
|
}
|
|
|
|
if (ym == I8X8_PRED) {
|
|
|
|
write_i8x8_mode(bc, m->bmi[0].as_mode.first,
|
|
|
|
c->fc.i8x8_mode_prob);
|
|
|
|
// printf(" mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout);
|
|
|
|
write_i8x8_mode(bc, m->bmi[2].as_mode.first,
|
|
|
|
c->fc.i8x8_mode_prob);
|
|
|
|
// printf(" mode: %d\n", m->bmi[2].as_mode.first); fflush(stdout);
|
|
|
|
write_i8x8_mode(bc, m->bmi[8].as_mode.first,
|
|
|
|
c->fc.i8x8_mode_prob);
|
|
|
|
// printf(" mode: %d\n", m->bmi[8].as_mode.first); fflush(stdout);
|
|
|
|
write_i8x8_mode(bc, m->bmi[10].as_mode.first,
|
|
|
|
c->fc.i8x8_mode_prob);
|
|
|
|
// printf(" mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout);
|
|
|
|
} else
|
|
|
|
write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
|
|
|
|
|
2012-11-08 20:03:00 +01:00
|
|
|
if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
|
2013-01-06 03:20:25 +01:00
|
|
|
!((c->mb_no_coeff_skip && skip_coeff) ||
|
2013-01-28 16:22:53 +01:00
|
|
|
(vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
|
2012-10-16 22:52:39 +02:00
|
|
|
TX_SIZE sz = m->mbmi.txfm_size;
|
|
|
|
// FIXME(rbultje) code ternary symbol once all experiments are merged
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
if (sz != TX_4X4 && ym <= TM_PRED) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, sz != TX_8X8, c->prob_tx[1]);
|
2013-01-06 03:20:25 +01:00
|
|
|
if (m->mbmi.sb_type && sz != TX_8X8)
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
vp9_write(bc, sz != TX_16X16, c->prob_tx[2]);
|
|
|
|
}
|
2012-10-16 22:52:39 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
|
|
|
|
TOKENEXTRA **tok, TOKENEXTRA *tok_end,
|
|
|
|
int mb_row, int mb_col) {
|
|
|
|
VP9_COMMON *const c = &cpi->common;
|
|
|
|
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
|
|
|
|
|
|
|
|
xd->mode_info_context = m;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
xd->left_available = mb_col > c->cur_tile_mb_col_start;
|
|
|
|
xd->right_available =
|
|
|
|
(mb_col + (1 << m->mbmi.sb_type)) < c->cur_tile_mb_col_end;
|
|
|
|
xd->up_available = mb_row > 0;
|
2013-01-06 03:20:25 +01:00
|
|
|
if (c->frame_type == KEY_FRAME) {
|
|
|
|
write_mb_modes_kf(cpi, m, bc,
|
|
|
|
c->mb_rows - mb_row, c->mb_cols - mb_col);
|
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
active_section = 8;
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
pack_inter_mode_mvs(cpi, m, bc,
|
|
|
|
c->mb_rows - mb_row, c->mb_cols - mb_col);
|
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
active_section = 1;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(*tok < tok_end);
|
|
|
|
pack_mb_tokens(bc, tok, tok_end);
|
|
|
|
}
|
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
|
2013-02-08 20:33:11 +01:00
|
|
|
TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const c = &cpi->common;
|
2012-07-14 00:21:29 +02:00
|
|
|
const int mis = c->mode_info_stride;
|
2013-02-08 20:33:11 +01:00
|
|
|
MODE_INFO *m, *m_ptr = c->mi;
|
2013-01-06 03:20:25 +01:00
|
|
|
int i, mb_row, mb_col;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-08 20:33:11 +01:00
|
|
|
m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis;
|
|
|
|
for (mb_row = c->cur_tile_mb_row_start;
|
|
|
|
mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) {
|
2013-01-06 03:20:25 +01:00
|
|
|
m = m_ptr;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
for (mb_col = c->cur_tile_mb_col_start;
|
|
|
|
mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) {
|
2013-01-06 03:20:25 +01:00
|
|
|
vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded);
|
|
|
|
if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
|
2013-01-10 02:21:28 +01:00
|
|
|
} else {
|
2013-01-06 03:20:25 +01:00
|
|
|
int j;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
for (j = 0; j < 4; j++) {
|
|
|
|
const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2;
|
|
|
|
MODE_INFO *sb_m = m + y_idx_sb * mis + x_idx_sb;
|
2012-04-18 18:08:55 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
if (mb_col + x_idx_sb >= c->mb_cols ||
|
|
|
|
mb_row + y_idx_sb >= c->mb_rows)
|
|
|
|
continue;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
vp9_write(bc, sb_m->mbmi.sb_type, c->sb32_coded);
|
|
|
|
if (sb_m->mbmi.sb_type) {
|
|
|
|
assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32);
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
write_modes_b(cpi, sb_m, bc, tok, tok_end,
|
2013-01-06 03:20:25 +01:00
|
|
|
mb_row + y_idx_sb, mb_col + x_idx_sb);
|
2013-01-08 19:29:22 +01:00
|
|
|
} else {
|
2013-01-06 03:20:25 +01:00
|
|
|
// Process the 4 MBs in the order:
|
|
|
|
// top-left, top-right, bottom-left, bottom-right
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1);
|
|
|
|
MODE_INFO *mb_m = m + x_idx + y_idx * mis;
|
|
|
|
|
|
|
|
if (mb_row + y_idx >= c->mb_rows ||
|
|
|
|
mb_col + x_idx >= c->mb_cols) {
|
|
|
|
// MB lies outside frame, move on
|
|
|
|
continue;
|
|
|
|
}
|
2012-10-09 18:18:21 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16);
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
write_modes_b(cpi, mb_m, bc, tok, tok_end,
|
2013-01-06 03:20:25 +01:00
|
|
|
mb_row + y_idx, mb_col + x_idx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-02-02 18:04:40 +01:00
|
|
|
|
2011-06-10 11:11:15 +02:00
|
|
|
|
|
|
|
/* This function is used for debugging probability trees. */
|
2012-12-08 01:09:59 +01:00
|
|
|
static void print_prob_tree(vp9_coeff_probs *coef_probs) {
|
2012-07-14 00:21:29 +02:00
|
|
|
/* print coef probability tree */
|
|
|
|
int i, j, k, l;
|
|
|
|
FILE *f = fopen("enc_tree_probs.txt", "a");
|
|
|
|
fprintf(f, "{\n");
|
2012-12-08 01:09:59 +01:00
|
|
|
for (i = 0; i < BLOCK_TYPES_4X4; i++) {
|
2012-07-14 00:21:29 +02:00
|
|
|
fprintf(f, " {\n");
|
|
|
|
for (j = 0; j < COEF_BANDS; j++) {
|
|
|
|
fprintf(f, " {\n");
|
|
|
|
for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
|
|
|
|
fprintf(f, " {");
|
|
|
|
for (l = 0; l < ENTROPY_NODES; l++) {
|
|
|
|
fprintf(f, "%3u, ",
|
|
|
|
(unsigned int)(coef_probs [i][j][k][l]));
|
2011-06-10 11:11:15 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
fprintf(f, " }\n");
|
|
|
|
}
|
|
|
|
fprintf(f, " }\n");
|
2011-06-10 11:11:15 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
fprintf(f, " }\n");
|
|
|
|
}
|
|
|
|
fprintf(f, "}\n");
|
|
|
|
fclose(f);
|
2011-06-10 11:11:15 +02:00
|
|
|
}
|
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
static void build_tree_distribution(vp9_coeff_probs *coef_probs,
|
|
|
|
vp9_coeff_count *coef_counts,
|
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
VP9_COMP *cpi,
|
|
|
|
vp9_coeff_accum *context_counters,
|
|
|
|
#endif
|
|
|
|
vp9_coeff_stats *coef_branch_ct,
|
|
|
|
int block_types) {
|
2012-08-03 02:03:14 +02:00
|
|
|
int i = 0, j, k;
|
2012-06-15 03:14:43 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-08-03 02:03:14 +02:00
|
|
|
int t = 0;
|
2012-06-15 03:14:43 +02:00
|
|
|
#endif
|
2012-12-08 01:09:59 +01:00
|
|
|
|
|
|
|
for (i = 0; i < block_types; ++i) {
|
2012-08-03 02:03:14 +02:00
|
|
|
for (j = 0; j < COEF_BANDS; ++j) {
|
|
|
|
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
|
2013-02-15 21:09:05 +01:00
|
|
|
if (k >= 3 && j == 0)
|
2012-07-14 00:21:29 +02:00
|
|
|
continue;
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
|
|
|
|
vp9_coef_encodings, vp9_coef_tree,
|
|
|
|
coef_probs[i][j][k],
|
|
|
|
coef_branch_ct[i][j][k],
|
Consistently use get_prob(), clip_prob() and newly added clip_pixel().
Add a function clip_pixel() to clip a pixel value to the [0,255] range
of allowed values, and use this where-ever appropriate (e.g. prediction,
reconstruction). Likewise, consistently use the recently added function
clip_prob(), which calculates a binary probability in the [1,255] range.
If possible, try to use get_prob() or its sister get_binary_prob() to
calculate binary probabilities, for consistency.
Since in some places, this means that binary probability calculations
are changed (we use {255,256}*count0/(total) in a range of places,
and all of these are now changed to use 256*count0+(total>>1)/total),
this changes the encoding result, so this patch warrants some extensive
testing.
Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628
2012-12-10 21:09:07 +01:00
|
|
|
coef_counts[i][j][k]);
|
2012-04-12 18:24:03 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-08-03 02:03:14 +02:00
|
|
|
if (!cpi->dummy_packing)
|
|
|
|
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
|
2012-12-08 01:09:59 +01:00
|
|
|
context_counters[i][j][k][t] += coef_counts[i][j][k][t];
|
2012-07-14 00:21:29 +02:00
|
|
|
#endif
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-12-08 01:09:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void build_coeff_contexts(VP9_COMP *cpi) {
|
|
|
|
build_tree_distribution(cpi->frame_coef_probs_4x4,
|
|
|
|
cpi->coef_counts_4x4,
|
2012-09-10 07:42:35 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi, context_counters_4x4,
|
2012-09-10 07:42:35 +02:00
|
|
|
#endif
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_branch_ct_4x4, BLOCK_TYPES_4X4);
|
|
|
|
build_tree_distribution(cpi->frame_hybrid_coef_probs_4x4,
|
|
|
|
cpi->hybrid_coef_counts_4x4,
|
2012-04-12 18:24:03 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi, hybrid_context_counters_4x4,
|
2012-04-12 18:24:03 +02:00
|
|
|
#endif
|
2013-02-15 21:09:05 +01:00
|
|
|
cpi->frame_hybrid_branch_ct_4x4,
|
|
|
|
BLOCK_TYPES_4X4_HYBRID);
|
2012-12-08 01:09:59 +01:00
|
|
|
build_tree_distribution(cpi->frame_coef_probs_8x8,
|
|
|
|
cpi->coef_counts_8x8,
|
2012-09-10 07:42:35 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi, context_counters_8x8,
|
2012-09-10 07:42:35 +02:00
|
|
|
#endif
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_branch_ct_8x8, BLOCK_TYPES_8X8);
|
|
|
|
build_tree_distribution(cpi->frame_hybrid_coef_probs_8x8,
|
|
|
|
cpi->hybrid_coef_counts_8x8,
|
2012-08-03 02:03:14 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi, hybrid_context_counters_8x8,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
#endif
|
2013-02-15 21:09:05 +01:00
|
|
|
cpi->frame_hybrid_branch_ct_8x8,
|
|
|
|
BLOCK_TYPES_8X8_HYBRID);
|
2012-12-08 01:09:59 +01:00
|
|
|
build_tree_distribution(cpi->frame_coef_probs_16x16,
|
|
|
|
cpi->coef_counts_16x16,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi, context_counters_16x16,
|
2012-08-03 02:03:14 +02:00
|
|
|
#endif
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_branch_ct_16x16, BLOCK_TYPES_16X16);
|
|
|
|
build_tree_distribution(cpi->frame_hybrid_coef_probs_16x16,
|
|
|
|
cpi->hybrid_coef_counts_16x16,
|
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi, hybrid_context_counters_16x16,
|
|
|
|
#endif
|
2013-02-15 21:09:05 +01:00
|
|
|
cpi->frame_hybrid_branch_ct_16x16,
|
|
|
|
BLOCK_TYPES_16X16_HYBRID);
|
2012-12-08 01:09:59 +01:00
|
|
|
build_tree_distribution(cpi->frame_coef_probs_32x32,
|
|
|
|
cpi->coef_counts_32x32,
|
2012-09-10 07:42:35 +02:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi, context_counters_32x32,
|
2012-09-10 07:42:35 +02:00
|
|
|
#endif
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
static void update_coef_probs_common(vp9_writer* const bc,
|
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
VP9_COMP *cpi,
|
|
|
|
vp9_coeff_stats *tree_update_hist,
|
|
|
|
#endif
|
|
|
|
vp9_coeff_probs *new_frame_coef_probs,
|
|
|
|
vp9_coeff_probs *old_frame_coef_probs,
|
|
|
|
vp9_coeff_stats *frame_branch_ct,
|
|
|
|
int block_types) {
|
2012-08-03 02:03:14 +02:00
|
|
|
int i, j, k, t;
|
2012-07-14 00:21:29 +02:00
|
|
|
int update[2] = {0, 0};
|
|
|
|
int savings;
|
2012-10-31 22:40:53 +01:00
|
|
|
// vp9_prob bestupd = find_coef_update_prob(cpi);
|
2012-04-12 18:24:03 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* dry run to see if there is any udpate at all needed */
|
|
|
|
savings = 0;
|
2012-12-08 01:09:59 +01:00
|
|
|
for (i = 0; i < block_types; ++i) {
|
2013-02-15 21:09:05 +01:00
|
|
|
for (j = 0; j < COEF_BANDS; ++j) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int prev_coef_savings[ENTROPY_NODES] = {0};
|
2012-08-03 02:03:14 +02:00
|
|
|
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
|
|
|
|
for (t = 0; t < ENTROPY_NODES; ++t) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_prob newp = new_frame_coef_probs[i][j][k][t];
|
|
|
|
const vp9_prob oldp = old_frame_coef_probs[i][j][k][t];
|
|
|
|
const vp9_prob upd = COEF_UPDATE_PROB;
|
2012-07-14 00:21:29 +02:00
|
|
|
int s = prev_coef_savings[t];
|
|
|
|
int u = 0;
|
2013-02-15 21:09:05 +01:00
|
|
|
if (k >= 3 && j == 0)
|
2012-07-14 00:21:29 +02:00
|
|
|
continue;
|
2012-07-27 20:29:46 +02:00
|
|
|
#if defined(SEARCH_NEWP)
|
2012-07-14 00:21:29 +02:00
|
|
|
s = prob_diff_update_savings_search(
|
2012-10-20 00:35:36 +02:00
|
|
|
frame_branch_ct[i][j][k][t],
|
|
|
|
oldp, &newp, upd);
|
|
|
|
if (s > 0 && newp != oldp)
|
2012-07-14 00:21:29 +02:00
|
|
|
u = 1;
|
|
|
|
if (u)
|
2012-10-31 22:40:53 +01:00
|
|
|
savings += s - (int)(vp9_cost_zero(upd));
|
2012-07-14 00:21:29 +02:00
|
|
|
else
|
2012-10-31 22:40:53 +01:00
|
|
|
savings -= (int)(vp9_cost_zero(upd));
|
2012-04-12 18:24:03 +02:00
|
|
|
#else
|
2012-07-14 00:21:29 +02:00
|
|
|
s = prob_update_savings(
|
2012-10-20 00:35:36 +02:00
|
|
|
frame_branch_ct[i][j][k][t],
|
|
|
|
oldp, newp, upd);
|
2012-07-14 00:21:29 +02:00
|
|
|
if (s > 0)
|
|
|
|
u = 1;
|
|
|
|
if (u)
|
|
|
|
savings += s;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
update[u]++;
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// printf("Update %d %d, savings %d\n", update[0], update[1], savings);
|
|
|
|
/* Is coef updated at all */
|
2012-10-18 01:47:38 +02:00
|
|
|
if (update[1] == 0 || savings < 0) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 0);
|
2012-10-18 01:47:38 +02:00
|
|
|
} else {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 1);
|
2012-12-08 01:09:59 +01:00
|
|
|
for (i = 0; i < block_types; ++i) {
|
2013-02-15 21:09:05 +01:00
|
|
|
for (j = 0; j < COEF_BANDS; ++j) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int prev_coef_savings[ENTROPY_NODES] = {0};
|
2012-08-03 02:03:14 +02:00
|
|
|
for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
|
2012-07-14 00:21:29 +02:00
|
|
|
// calc probs and branch cts for this frame only
|
2012-08-03 02:03:14 +02:00
|
|
|
for (t = 0; t < ENTROPY_NODES; ++t) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_prob newp = new_frame_coef_probs[i][j][k][t];
|
|
|
|
vp9_prob *oldp = old_frame_coef_probs[i][j][k] + t;
|
|
|
|
const vp9_prob upd = COEF_UPDATE_PROB;
|
2012-07-14 00:21:29 +02:00
|
|
|
int s = prev_coef_savings[t];
|
|
|
|
int u = 0;
|
2013-02-15 21:09:05 +01:00
|
|
|
if (k >= 3 && j == 0)
|
2012-07-14 00:21:29 +02:00
|
|
|
continue;
|
2012-02-26 02:15:47 +01:00
|
|
|
|
2012-07-27 20:29:46 +02:00
|
|
|
#if defined(SEARCH_NEWP)
|
2012-07-14 00:21:29 +02:00
|
|
|
s = prob_diff_update_savings_search(
|
2012-10-20 00:35:36 +02:00
|
|
|
frame_branch_ct[i][j][k][t],
|
|
|
|
*oldp, &newp, upd);
|
|
|
|
if (s > 0 && newp != *oldp)
|
2012-07-14 00:21:29 +02:00
|
|
|
u = 1;
|
2012-04-12 18:24:03 +02:00
|
|
|
#else
|
2012-07-14 00:21:29 +02:00
|
|
|
s = prob_update_savings(
|
2012-10-20 00:35:36 +02:00
|
|
|
frame_branch_ct[i][j][k][t],
|
|
|
|
*oldp, newp, upd);
|
2012-07-14 00:21:29 +02:00
|
|
|
if (s > 0)
|
|
|
|
u = 1;
|
2012-04-12 18:24:03 +02:00
|
|
|
#endif
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(bc, u, upd);
|
2012-02-07 00:10:13 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
2012-07-14 00:21:29 +02:00
|
|
|
if (!cpi->dummy_packing)
|
2012-12-08 01:09:59 +01:00
|
|
|
++tree_update_hist[i][j][k][t][u];
|
2010-05-18 17:58:33 +02:00
|
|
|
#endif
|
2012-07-14 00:21:29 +02:00
|
|
|
if (u) {
|
|
|
|
/* send/use new probability */
|
2012-10-20 00:35:36 +02:00
|
|
|
write_prob_diff_update(bc, newp, *oldp);
|
|
|
|
*oldp = newp;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-10-20 00:35:36 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) {
|
|
|
|
vp9_clear_system_state();
|
2012-09-10 07:42:35 +02:00
|
|
|
|
2012-10-20 00:35:36 +02:00
|
|
|
// Build the cofficient contexts based on counts collected in encode loop
|
|
|
|
build_coeff_contexts(cpi);
|
2012-09-10 07:42:35 +02:00
|
|
|
|
2012-10-20 00:35:36 +02:00
|
|
|
update_coef_probs_common(bc,
|
2012-12-08 01:09:59 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi,
|
|
|
|
tree_update_hist_4x4,
|
|
|
|
#endif
|
|
|
|
cpi->frame_coef_probs_4x4,
|
|
|
|
cpi->common.fc.coef_probs_4x4,
|
|
|
|
cpi->frame_branch_ct_4x4,
|
|
|
|
BLOCK_TYPES_4X4);
|
2012-09-10 07:42:35 +02:00
|
|
|
|
2012-10-20 00:35:36 +02:00
|
|
|
update_coef_probs_common(bc,
|
2012-12-08 01:09:59 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi,
|
|
|
|
hybrid_tree_update_hist_4x4,
|
|
|
|
#endif
|
|
|
|
cpi->frame_hybrid_coef_probs_4x4,
|
|
|
|
cpi->common.fc.hybrid_coef_probs_4x4,
|
|
|
|
cpi->frame_hybrid_branch_ct_4x4,
|
2013-02-15 21:09:05 +01:00
|
|
|
BLOCK_TYPES_4X4_HYBRID);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-08-03 02:03:14 +02:00
|
|
|
/* do not do this if not even allowed */
|
2012-10-09 18:18:21 +02:00
|
|
|
if (cpi->common.txfm_mode != ONLY_4X4) {
|
2012-10-20 00:35:36 +02:00
|
|
|
update_coef_probs_common(bc,
|
2012-12-08 01:09:59 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi,
|
|
|
|
tree_update_hist_8x8,
|
|
|
|
#endif
|
2012-10-20 00:35:36 +02:00
|
|
|
cpi->frame_coef_probs_8x8,
|
|
|
|
cpi->common.fc.coef_probs_8x8,
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_branch_ct_8x8,
|
|
|
|
BLOCK_TYPES_8X8);
|
2012-10-20 00:35:36 +02:00
|
|
|
|
|
|
|
update_coef_probs_common(bc,
|
2012-12-08 01:09:59 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi,
|
|
|
|
hybrid_tree_update_hist_8x8,
|
|
|
|
#endif
|
2012-10-20 00:35:36 +02:00
|
|
|
cpi->frame_hybrid_coef_probs_8x8,
|
|
|
|
cpi->common.fc.hybrid_coef_probs_8x8,
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_hybrid_branch_ct_8x8,
|
2013-02-15 21:09:05 +01:00
|
|
|
BLOCK_TYPES_8X8_HYBRID);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-10-09 18:18:21 +02:00
|
|
|
if (cpi->common.txfm_mode > ALLOW_8X8) {
|
2012-10-20 00:35:36 +02:00
|
|
|
update_coef_probs_common(bc,
|
2012-12-08 01:09:59 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi,
|
|
|
|
tree_update_hist_16x16,
|
|
|
|
#endif
|
2012-10-20 00:35:36 +02:00
|
|
|
cpi->frame_coef_probs_16x16,
|
|
|
|
cpi->common.fc.coef_probs_16x16,
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_branch_ct_16x16,
|
|
|
|
BLOCK_TYPES_16X16);
|
2012-10-20 00:35:36 +02:00
|
|
|
update_coef_probs_common(bc,
|
2012-12-08 01:09:59 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi,
|
|
|
|
hybrid_tree_update_hist_16x16,
|
|
|
|
#endif
|
2012-10-20 00:35:36 +02:00
|
|
|
cpi->frame_hybrid_coef_probs_16x16,
|
|
|
|
cpi->common.fc.hybrid_coef_probs_16x16,
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_hybrid_branch_ct_16x16,
|
2013-02-15 21:09:05 +01:00
|
|
|
BLOCK_TYPES_16X16_HYBRID);
|
2012-10-09 18:18:21 +02:00
|
|
|
}
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
|
|
|
|
if (cpi->common.txfm_mode > ALLOW_16X16) {
|
|
|
|
update_coef_probs_common(bc,
|
2012-12-08 01:09:59 +01:00
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
cpi,
|
|
|
|
tree_update_hist_32x32,
|
|
|
|
#endif
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
cpi->frame_coef_probs_32x32,
|
|
|
|
cpi->common.fc.coef_probs_32x32,
|
2012-12-08 01:09:59 +01:00
|
|
|
cpi->frame_branch_ct_32x32,
|
|
|
|
BLOCK_TYPES_32X32);
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-04-12 18:24:03 +02:00
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
#ifdef PACKET_TESTING
|
|
|
|
FILE *vpxlogc = 0;
|
|
|
|
#endif
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
static void put_delta_q(vp9_writer *bc, int delta_q) {
|
2012-07-14 00:21:29 +02:00
|
|
|
if (delta_q != 0) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 1);
|
|
|
|
vp9_write_literal(bc, abs(delta_q), 4);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (delta_q < 0)
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 1);
|
2010-05-18 17:58:33 +02:00
|
|
|
else
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 0);
|
2012-07-14 00:21:29 +02:00
|
|
|
} else
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(bc, 0);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-08-20 23:43:34 +02:00
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
static void decide_kf_ymode_entropy(VP9_COMP *cpi) {
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
int mode_cost[MB_MODE_COUNT];
|
|
|
|
int cost;
|
|
|
|
int bestcost = INT_MAX;
|
|
|
|
int bestindex = 0;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < 8; i++) {
|
2012-10-31 01:12:12 +01:00
|
|
|
vp9_cost_tokens(mode_cost, cpi->common.kf_ymode_prob[i], vp9_kf_ymode_tree);
|
2012-07-14 00:21:29 +02:00
|
|
|
cost = 0;
|
2012-10-31 22:40:53 +01:00
|
|
|
for (j = 0; j < VP9_YMODES; j++) {
|
2012-07-14 00:21:29 +02:00
|
|
|
cost += mode_cost[j] * cpi->ymode_count[j];
|
2011-08-31 21:01:58 +02:00
|
|
|
}
|
2012-10-30 20:58:42 +01:00
|
|
|
vp9_cost_tokens(mode_cost, cpi->common.sb_kf_ymode_prob[i],
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_sb_ymode_tree);
|
|
|
|
for (j = 0; j < VP9_I32X32_MODES; j++) {
|
2012-08-20 23:43:34 +02:00
|
|
|
cost += mode_cost[j] * cpi->sb_ymode_count[j];
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
if (cost < bestcost) {
|
|
|
|
bestindex = i;
|
|
|
|
bestcost = cost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cpi->common.kf_ymode_probs_index = bestindex;
|
2011-08-31 21:01:58 +02:00
|
|
|
|
|
|
|
}
|
2012-10-31 01:53:32 +01:00
|
|
|
static void segment_reference_frames(VP9_COMP *cpi) {
|
|
|
|
VP9_COMMON *oci = &cpi->common;
|
2012-07-14 00:21:29 +02:00
|
|
|
MODE_INFO *mi = oci->mi;
|
|
|
|
int ref[MAX_MB_SEGMENTS] = {0};
|
|
|
|
int i, j;
|
|
|
|
int mb_index = 0;
|
2012-10-17 23:51:27 +02:00
|
|
|
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
for (i = 0; i < oci->mb_rows; i++) {
|
|
|
|
for (j = 0; j < oci->mb_cols; j++, mb_index++) {
|
|
|
|
ref[mi[mb_index].mbmi.segment_id] |= (1 << mi[mb_index].mbmi.ref_frame);
|
2012-01-26 19:04:34 +01:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
mb_index++;
|
|
|
|
}
|
|
|
|
for (i = 0; i < MAX_MB_SEGMENTS; i++) {
|
2012-10-30 06:15:27 +01:00
|
|
|
vp9_enable_segfeature(xd, i, SEG_LVL_REF_FRAME);
|
|
|
|
vp9_set_segdata(xd, i, SEG_LVL_REF_FRAME, ref[i]);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-01-26 19:04:34 +01:00
|
|
|
}
|
2012-03-19 19:02:04 +01:00
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
|
2012-10-30 22:25:33 +01:00
|
|
|
unsigned long *size) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i, j;
|
2012-10-31 22:40:53 +01:00
|
|
|
VP9_HEADER oh;
|
2012-10-31 01:53:32 +01:00
|
|
|
VP9_COMMON *const pc = &cpi->common;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_writer header_bc, residual_bc;
|
2012-10-17 23:51:27 +02:00
|
|
|
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
|
2012-07-14 00:21:29 +02:00
|
|
|
int extra_bytes_packed = 0;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned char *cx_data = dest;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
oh.show_frame = (int) pc->show_frame;
|
|
|
|
oh.type = (int)pc->frame_type;
|
|
|
|
oh.version = pc->version;
|
|
|
|
oh.first_partition_length_in_bytes = 0;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
cx_data += 3;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
|
|
|
#if defined(SECTIONBITS_OUTPUT)
|
2012-10-31 22:40:53 +01:00
|
|
|
Sectionbits[active_section = 1] += sizeof(VP9_HEADER) * 8 * 256;
|
2010-05-18 17:58:33 +02:00
|
|
|
#endif
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
compute_update_table();
|
2012-04-12 18:24:03 +02:00
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
/* vp9_kf_default_bmode_probs() is called in vp9_setup_key_frame() once
|
2012-10-30 22:25:33 +01:00
|
|
|
* for each K frame before encode frame. pc->kf_bmode_prob doesn't get
|
|
|
|
* changed anywhere else. No need to call it again here. --yw
|
2012-10-31 00:25:53 +01:00
|
|
|
* vp9_kf_default_bmode_probs( pc->kf_bmode_prob);
|
2012-10-30 22:25:33 +01:00
|
|
|
*/
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-30 22:25:33 +01:00
|
|
|
/* every keyframe send startcode, width, height, scale factor, clamp
|
|
|
|
* and color type.
|
|
|
|
*/
|
2012-07-14 00:21:29 +02:00
|
|
|
if (oh.type == KEY_FRAME) {
|
|
|
|
// Start / synch code
|
|
|
|
cx_data[0] = 0x9D;
|
|
|
|
cx_data[1] = 0x01;
|
|
|
|
cx_data[2] = 0x2a;
|
2013-02-07 00:54:52 +01:00
|
|
|
extra_bytes_packed = 3;
|
|
|
|
cx_data += extra_bytes_packed;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
int v;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-07 00:54:52 +01:00
|
|
|
/* TODO(jkoleszar): support arbitrary resolutions */
|
2012-07-14 00:21:29 +02:00
|
|
|
v = (pc->horiz_scale << 14) | pc->Width;
|
2013-02-07 00:54:52 +01:00
|
|
|
cx_data[0] = v;
|
|
|
|
cx_data[1] = v >> 8;
|
2011-02-17 12:47:39 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
v = (pc->vert_scale << 14) | pc->Height;
|
2013-02-07 00:54:52 +01:00
|
|
|
cx_data[2] = v;
|
|
|
|
cx_data[3] = v >> 8;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-07 00:54:52 +01:00
|
|
|
extra_bytes_packed += 4;
|
|
|
|
cx_data += 4;
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-07 00:54:52 +01:00
|
|
|
vp9_start_encode(&header_bc, cx_data);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-07 00:54:52 +01:00
|
|
|
// TODO(jkoleszar): remove these two unused bits?
|
|
|
|
vp9_write_bit(&header_bc, pc->clr_type);
|
|
|
|
vp9_write_bit(&header_bc, pc->clamp_type);
|
2011-11-08 16:40:32 +01:00
|
|
|
|
2013-01-15 15:43:35 +01:00
|
|
|
// error resilient mode
|
|
|
|
vp9_write_bit(&header_bc, pc->error_resilient_mode);
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Signal whether or not Segmentation is enabled
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Indicate which features are enabled
|
|
|
|
if (xd->segmentation_enabled) {
|
|
|
|
// Indicate whether or not the segmentation map is being updated.
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (xd->update_mb_segmentation_map) ? 1 : 0);
|
2011-11-15 17:15:23 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// If it is, then indicate the method that will be used.
|
|
|
|
if (xd->update_mb_segmentation_map) {
|
|
|
|
// Select the coding strategy (temporal or spatial)
|
2012-10-30 05:39:44 +01:00
|
|
|
vp9_choose_segmap_coding_method(cpi);
|
2012-10-12 07:15:33 +02:00
|
|
|
// Send the tree probabilities used to decode unpredicted
|
|
|
|
// macro-block segments
|
|
|
|
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) {
|
|
|
|
int data = xd->mb_segment_tree_probs[i];
|
|
|
|
|
|
|
|
if (data != 255) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 1);
|
|
|
|
vp9_write_literal(&header_bc, data, 8);
|
2012-10-12 07:15:33 +02:00
|
|
|
} else {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 0);
|
2012-10-12 07:15:33 +02:00
|
|
|
}
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Write out the chosen coding method.
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0);
|
2012-10-12 07:15:33 +02:00
|
|
|
if (pc->temporal_update) {
|
|
|
|
for (i = 0; i < PREDICTION_PROBS; i++) {
|
|
|
|
int data = pc->segment_pred_probs[i];
|
|
|
|
|
|
|
|
if (data != 255) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 1);
|
|
|
|
vp9_write_literal(&header_bc, data, 8);
|
2012-10-12 07:15:33 +02:00
|
|
|
} else {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 0);
|
2012-10-12 07:15:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (xd->update_mb_segmentation_data) ? 1 : 0);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// segment_reference_frames(cpi);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
if (xd->update_mb_segmentation_data) {
|
|
|
|
signed char Data;
|
2012-02-13 17:21:24 +01:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (xd->mb_segment_abs_delta) ? 1 : 0);
|
2012-02-13 17:21:24 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// For each segments id...
|
|
|
|
for (i = 0; i < MAX_MB_SEGMENTS; i++) {
|
|
|
|
// For each segmentation codable feature...
|
|
|
|
for (j = 0; j < SEG_LVL_MAX; j++) {
|
2012-10-30 06:15:27 +01:00
|
|
|
Data = vp9_get_segdata(xd, i, j);
|
2012-02-13 17:21:24 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// If the feature is enabled...
|
2012-10-30 06:15:27 +01:00
|
|
|
if (vp9_segfeature_active(xd, i, j)) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 1);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Is the segment data signed..
|
2012-10-30 06:15:27 +01:00
|
|
|
if (vp9_is_segfeature_signed(j)) {
|
2012-07-14 00:21:29 +02:00
|
|
|
// Encode the relevant feature data
|
|
|
|
if (Data < 0) {
|
|
|
|
Data = - Data;
|
2012-11-27 20:16:15 +01:00
|
|
|
vp9_encode_unsigned_max(&header_bc, Data,
|
|
|
|
vp9_seg_feature_data_max(j));
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 1);
|
2012-07-14 00:21:29 +02:00
|
|
|
} else {
|
2012-11-27 20:16:15 +01:00
|
|
|
vp9_encode_unsigned_max(&header_bc, Data,
|
|
|
|
vp9_seg_feature_data_max(j));
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 0);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-01-28 13:20:14 +01:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
// Unsigned data element so no sign bit needed
|
2012-01-28 13:20:14 +01:00
|
|
|
else
|
2012-11-27 20:16:15 +01:00
|
|
|
vp9_encode_unsigned_max(&header_bc, Data,
|
|
|
|
vp9_seg_feature_data_max(j));
|
2012-07-14 00:21:29 +02:00
|
|
|
} else
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 0);
|
2012-01-28 13:20:14 +01:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-01-28 13:20:14 +01:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Encode the common prediction model status flag probability updates for
|
|
|
|
// the reference frame
|
|
|
|
update_refpred_stats(cpi);
|
|
|
|
if (pc->frame_type != KEY_FRAME) {
|
|
|
|
for (i = 0; i < PREDICTION_PROBS; i++) {
|
|
|
|
if (cpi->ref_pred_probs_update[i]) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 1);
|
|
|
|
vp9_write_literal(&header_bc, pc->ref_pred_probs[i], 8);
|
2012-10-18 01:47:38 +02:00
|
|
|
} else {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 0);
|
2012-10-18 01:47:38 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
pc->sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]);
|
|
|
|
vp9_write_literal(&header_bc, pc->sb64_coded, 8);
|
|
|
|
pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
|
|
|
|
vp9_write_literal(&header_bc, pc->sb32_coded, 8);
|
2013-02-12 06:14:46 +01:00
|
|
|
#if CONFIG_LOSSLESS
|
|
|
|
vp9_write_bit(&header_bc, cpi->oxcf.lossless);
|
|
|
|
if (cpi->oxcf.lossless) {
|
|
|
|
pc->txfm_mode = ONLY_4X4;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
2012-10-09 18:18:21 +02:00
|
|
|
{
|
2012-10-11 02:18:22 +02:00
|
|
|
if (pc->txfm_mode == TX_MODE_SELECT) {
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
|
|
|
|
cpi->txfm_count_16x16p[TX_4X4] +
|
|
|
|
cpi->txfm_count_8x8p[TX_4X4],
|
|
|
|
cpi->txfm_count_32x32p[TX_4X4] +
|
|
|
|
cpi->txfm_count_32x32p[TX_8X8] +
|
|
|
|
cpi->txfm_count_32x32p[TX_16X16] +
|
|
|
|
cpi->txfm_count_32x32p[TX_32X32] +
|
|
|
|
cpi->txfm_count_16x16p[TX_4X4] +
|
|
|
|
cpi->txfm_count_16x16p[TX_8X8] +
|
|
|
|
cpi->txfm_count_16x16p[TX_16X16] +
|
|
|
|
cpi->txfm_count_8x8p[TX_4X4] +
|
|
|
|
cpi->txfm_count_8x8p[TX_8X8]);
|
|
|
|
pc->prob_tx[1] = get_prob(cpi->txfm_count_32x32p[TX_8X8] +
|
|
|
|
cpi->txfm_count_16x16p[TX_8X8],
|
|
|
|
cpi->txfm_count_32x32p[TX_8X8] +
|
|
|
|
cpi->txfm_count_32x32p[TX_16X16] +
|
|
|
|
cpi->txfm_count_32x32p[TX_32X32] +
|
|
|
|
cpi->txfm_count_16x16p[TX_8X8] +
|
|
|
|
cpi->txfm_count_16x16p[TX_16X16]);
|
|
|
|
pc->prob_tx[2] = get_prob(cpi->txfm_count_32x32p[TX_16X16],
|
|
|
|
cpi->txfm_count_32x32p[TX_16X16] +
|
|
|
|
cpi->txfm_count_32x32p[TX_32X32]);
|
2012-10-09 18:18:21 +02:00
|
|
|
} else {
|
|
|
|
pc->prob_tx[0] = 128;
|
|
|
|
pc->prob_tx[1] = 128;
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
pc->prob_tx[2] = 128;
|
|
|
|
}
|
|
|
|
vp9_write_literal(&header_bc, pc->txfm_mode <= 3 ? pc->txfm_mode : 3, 2);
|
|
|
|
if (pc->txfm_mode > ALLOW_16X16) {
|
|
|
|
vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT);
|
2012-10-09 18:18:21 +02:00
|
|
|
}
|
|
|
|
if (pc->txfm_mode == TX_MODE_SELECT) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->prob_tx[0], 8);
|
|
|
|
vp9_write_literal(&header_bc, pc->prob_tx[1], 8);
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->prob_tx[2], 8);
|
2012-10-09 18:18:21 +02:00
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Encode the loop filter level and type
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, pc->filter_type);
|
|
|
|
vp9_write_literal(&header_bc, pc->filter_level, 6);
|
|
|
|
vp9_write_literal(&header_bc, pc->sharpness_level, 3);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled).
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
if (xd->mode_ref_lf_delta_enabled) {
|
|
|
|
// Do the deltas need to be updated
|
|
|
|
int send_update = xd->mode_ref_lf_delta_update;
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, send_update);
|
2012-07-14 00:21:29 +02:00
|
|
|
if (send_update) {
|
|
|
|
int Data;
|
|
|
|
|
|
|
|
// Send update
|
|
|
|
for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
|
|
|
|
Data = xd->ref_lf_deltas[i];
|
|
|
|
|
|
|
|
// Frame level data
|
|
|
|
if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]) {
|
|
|
|
xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i];
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 1);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
if (Data > 0) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, (Data & 0x3F), 6);
|
|
|
|
vp9_write_bit(&header_bc, 0); // sign
|
2012-07-14 00:21:29 +02:00
|
|
|
} else {
|
|
|
|
Data = -Data;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, (Data & 0x3F), 6);
|
|
|
|
vp9_write_bit(&header_bc, 1); // sign
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-10-18 01:47:38 +02:00
|
|
|
} else {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 0);
|
2012-10-18 01:47:38 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Send update
|
|
|
|
for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
|
|
|
|
Data = xd->mode_lf_deltas[i];
|
|
|
|
|
|
|
|
if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i]) {
|
|
|
|
xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i];
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 1);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
if (Data > 0) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, (Data & 0x3F), 6);
|
|
|
|
vp9_write_bit(&header_bc, 0); // sign
|
2012-07-14 00:21:29 +02:00
|
|
|
} else {
|
|
|
|
Data = -Data;
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, (Data & 0x3F), 6);
|
|
|
|
vp9_write_bit(&header_bc, 1); // sign
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2012-10-18 01:47:38 +02:00
|
|
|
} else {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, 0);
|
2012-10-18 01:47:38 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// signal here is multi token partition is enabled
|
2012-10-31 22:40:53 +01:00
|
|
|
// vp9_write_literal(&header_bc, pc->multi_token_partition, 2);
|
|
|
|
vp9_write_literal(&header_bc, 0, 2);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Frame Q baseline quantizer index
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Transmit Dc, Second order and Uv quantizer delta information
|
2012-10-18 01:47:38 +02:00
|
|
|
put_delta_q(&header_bc, pc->y1dc_delta_q);
|
|
|
|
put_delta_q(&header_bc, pc->uvdc_delta_q);
|
|
|
|
put_delta_q(&header_bc, pc->uvac_delta_q);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// When there is a key frame all reference buffers are updated using the new key frame
|
|
|
|
if (pc->frame_type != KEY_FRAME) {
|
2013-01-16 21:19:42 +01:00
|
|
|
int refresh_mask;
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Should the GF or ARF be updated using the transmitted frame or buffer
|
2013-01-16 21:19:42 +01:00
|
|
|
if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
|
|
|
|
/* Preserve the previously existing golden frame and update the frame in
|
|
|
|
* the alt ref slot instead. This is highly specific to the use of
|
|
|
|
* alt-ref as a forward reference, and this needs to be generalized as
|
|
|
|
* other uses are implemented (like RTC/temporal scaling)
|
|
|
|
*
|
|
|
|
* gld_fb_idx and alt_fb_idx need to be swapped for future frames, but
|
|
|
|
* that happens in vp9_onyx_if.c:update_reference_frames() so that it can
|
|
|
|
* be done outside of the recode loop.
|
|
|
|
*/
|
|
|
|
refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) |
|
|
|
|
(cpi->refresh_golden_frame << cpi->alt_fb_idx);
|
|
|
|
} else {
|
|
|
|
refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) |
|
|
|
|
(cpi->refresh_golden_frame << cpi->gld_fb_idx) |
|
|
|
|
(cpi->refresh_alt_ref_frame << cpi->alt_fb_idx);
|
|
|
|
}
|
|
|
|
vp9_write_literal(&header_bc, refresh_mask, NUM_REF_FRAMES);
|
|
|
|
vp9_write_literal(&header_bc, cpi->lst_fb_idx, NUM_REF_FRAMES_LG2);
|
|
|
|
vp9_write_literal(&header_bc, cpi->gld_fb_idx, NUM_REF_FRAMES_LG2);
|
|
|
|
vp9_write_literal(&header_bc, cpi->alt_fb_idx, NUM_REF_FRAMES_LG2);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
|
|
|
|
vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
|
2012-02-16 18:29:54 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Signal whether to allow high MV precision
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0);
|
2012-07-18 22:43:01 +02:00
|
|
|
if (pc->mcomp_filter_type == SWITCHABLE) {
|
|
|
|
/* Check to see if only one of the filters is actually used */
|
2012-10-31 22:40:53 +01:00
|
|
|
int count[VP9_SWITCHABLE_FILTERS];
|
2012-07-18 22:43:01 +02:00
|
|
|
int i, j, c = 0;
|
2012-10-31 22:40:53 +01:00
|
|
|
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
|
2012-07-18 22:43:01 +02:00
|
|
|
count[i] = 0;
|
2012-10-31 22:40:53 +01:00
|
|
|
for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
|
2012-07-18 22:43:01 +02:00
|
|
|
count[i] += cpi->switchable_interp_count[j][i];
|
|
|
|
}
|
|
|
|
c += (count[i] > 0);
|
|
|
|
}
|
|
|
|
if (c == 1) {
|
|
|
|
/* Only one filter is used. So set the filter at frame level */
|
2012-10-31 22:40:53 +01:00
|
|
|
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
|
2012-07-18 22:43:01 +02:00
|
|
|
if (count[i]) {
|
2012-10-31 01:12:12 +01:00
|
|
|
pc->mcomp_filter_type = vp9_switchable_interp[i];
|
2012-07-18 22:43:01 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
// Signal the type of subpel filter to use
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, (pc->mcomp_filter_type == SWITCHABLE));
|
2012-07-18 22:43:01 +02:00
|
|
|
if (pc->mcomp_filter_type != SWITCHABLE)
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, (pc->mcomp_filter_type), 2);
|
2012-11-07 15:50:25 +01:00
|
|
|
#if CONFIG_COMP_INTERINTRA_PRED
|
|
|
|
// printf("Counts: %d %d\n", cpi->interintra_count[0],
|
|
|
|
// cpi->interintra_count[1]);
|
|
|
|
if (!cpi->dummy_packing && pc->use_interintra)
|
|
|
|
pc->use_interintra = (cpi->interintra_count[1] > 0);
|
|
|
|
vp9_write_bit(&header_bc, pc->use_interintra);
|
|
|
|
if (!pc->use_interintra)
|
|
|
|
vp9_zero(cpi->interintra_count);
|
|
|
|
#endif
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-01-25 20:30:28 +01:00
|
|
|
if (!pc->error_resilient_mode) {
|
|
|
|
vp9_write_bit(&header_bc, pc->refresh_entropy_probs);
|
|
|
|
vp9_write_bit(&header_bc, pc->frame_parallel_decoding_mode);
|
|
|
|
}
|
|
|
|
|
2013-01-16 00:57:11 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->frame_context_idx,
|
|
|
|
NUM_FRAME_CONTEXTS_LG2);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
|
|
|
#ifdef ENTROPY_STATS
|
2012-07-14 00:21:29 +02:00
|
|
|
if (pc->frame_type == INTER_FRAME)
|
|
|
|
active_section = 0;
|
|
|
|
else
|
|
|
|
active_section = 7;
|
2010-05-18 17:58:33 +02:00
|
|
|
#endif
|
|
|
|
|
2012-11-16 17:31:32 +01:00
|
|
|
// If appropriate update the inter mode probability context and code the
|
|
|
|
// changes in the bitstream.
|
2012-12-10 13:38:48 +01:00
|
|
|
if (pc->frame_type != KEY_FRAME) {
|
2012-11-16 17:31:32 +01:00
|
|
|
int i, j;
|
|
|
|
int new_context[INTER_MODE_CONTEXTS][4];
|
2013-02-06 14:02:53 +01:00
|
|
|
if (!cpi->dummy_packing) {
|
|
|
|
update_inter_mode_probs(pc, new_context);
|
|
|
|
} else {
|
|
|
|
// In dummy pack assume context unchanged.
|
|
|
|
vpx_memcpy(new_context, pc->fc.vp9_mode_contexts,
|
|
|
|
sizeof(pc->fc.vp9_mode_contexts));
|
|
|
|
}
|
2012-11-16 17:31:32 +01:00
|
|
|
|
|
|
|
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
|
|
|
|
for (j = 0; j < 4; j++) {
|
|
|
|
if (new_context[i][j] != pc->fc.vp9_mode_contexts[i][j]) {
|
|
|
|
vp9_write(&header_bc, 1, 252);
|
|
|
|
vp9_write_literal(&header_bc, new_context[i][j], 8);
|
|
|
|
|
|
|
|
// Only update the persistent copy if this is the "real pack"
|
|
|
|
if (!cpi->dummy_packing) {
|
|
|
|
pc->fc.vp9_mode_contexts[i][j] = new_context[i][j];
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
vp9_write(&header_bc, 0, 252);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-10 13:38:48 +01:00
|
|
|
#if CONFIG_NEW_MVREF
|
|
|
|
if ((pc->frame_type != KEY_FRAME)) {
|
|
|
|
int new_mvref_probs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES-1];
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
update_mv_ref_probs(cpi, new_mvref_probs);
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_REF_FRAMES; ++i) {
|
|
|
|
// Skip the dummy entry for intra ref frame.
|
|
|
|
if (i == INTRA_FRAME) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Encode any mandated updates to probabilities
|
|
|
|
for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) {
|
|
|
|
if (new_mvref_probs[i][j] != xd->mb_mv_ref_probs[i][j]) {
|
|
|
|
vp9_write(&header_bc, 1, VP9_MVREF_UPDATE_PROB);
|
|
|
|
vp9_write_literal(&header_bc, new_mvref_probs[i][j], 8);
|
|
|
|
|
|
|
|
// Only update the persistent copy if this is the "real pack"
|
|
|
|
if (!cpi->dummy_packing) {
|
|
|
|
xd->mb_mv_ref_probs[i][j] = new_mvref_probs[i][j];
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
vp9_write(&header_bc, 0, VP9_MVREF_UPDATE_PROB);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_clear_system_state(); // __asm emms;
|
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
vp9_copy(cpi->common.fc.pre_coef_probs_4x4,
|
|
|
|
cpi->common.fc.coef_probs_4x4);
|
|
|
|
vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_4x4,
|
|
|
|
cpi->common.fc.hybrid_coef_probs_4x4);
|
|
|
|
vp9_copy(cpi->common.fc.pre_coef_probs_8x8,
|
|
|
|
cpi->common.fc.coef_probs_8x8);
|
|
|
|
vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8,
|
|
|
|
cpi->common.fc.hybrid_coef_probs_8x8);
|
|
|
|
vp9_copy(cpi->common.fc.pre_coef_probs_16x16,
|
|
|
|
cpi->common.fc.coef_probs_16x16);
|
|
|
|
vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16,
|
|
|
|
cpi->common.fc.hybrid_coef_probs_16x16);
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
vp9_copy(cpi->common.fc.pre_coef_probs_32x32,
|
|
|
|
cpi->common.fc.coef_probs_32x32);
|
2012-11-16 00:50:07 +01:00
|
|
|
vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
|
|
|
|
vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
|
|
|
|
vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob);
|
|
|
|
vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
|
|
|
|
vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
|
|
|
|
vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
|
2012-07-26 22:42:07 +02:00
|
|
|
cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
|
2012-11-07 15:50:25 +01:00
|
|
|
#if CONFIG_COMP_INTERINTRA_PRED
|
|
|
|
cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob;
|
|
|
|
#endif
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_zero(cpi->sub_mv_ref_count);
|
|
|
|
vp9_zero(cpi->mbsplit_count);
|
|
|
|
vp9_zero(cpi->common.fc.mv_ref_ct)
|
2012-09-10 07:42:35 +02:00
|
|
|
|
2012-10-18 01:47:38 +02:00
|
|
|
update_coef_probs(cpi, &header_bc);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
|
|
|
#ifdef ENTROPY_STATS
|
2012-07-14 00:21:29 +02:00
|
|
|
active_section = 2;
|
2010-05-18 17:58:33 +02:00
|
|
|
#endif
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// Write out the mb_no_coeff_skip flag
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_bit(&header_bc, pc->mb_no_coeff_skip);
|
2012-10-17 18:38:13 +02:00
|
|
|
if (pc->mb_no_coeff_skip) {
|
|
|
|
int k;
|
|
|
|
|
2012-10-30 05:25:22 +01:00
|
|
|
vp9_update_skip_probs(cpi);
|
2012-10-17 18:38:13 +02:00
|
|
|
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8);
|
2012-10-17 18:38:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (pc->frame_type == KEY_FRAME) {
|
|
|
|
if (!pc->kf_ymode_probs_update) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->kf_ymode_probs_index, 3);
|
2012-10-17 18:38:13 +02:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Update the probabilities used to encode reference frame data
|
|
|
|
update_ref_probs(cpi);
|
|
|
|
|
|
|
|
#ifdef ENTROPY_STATS
|
|
|
|
active_section = 1;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (pc->mcomp_filter_type == SWITCHABLE)
|
2012-10-18 01:47:38 +02:00
|
|
|
update_switchable_interp_probs(cpi, &header_bc);
|
2012-11-30 16:29:43 +01:00
|
|
|
|
2013-01-15 15:43:35 +01:00
|
|
|
#if CONFIG_COMP_INTERINTRA_PRED
|
2012-11-07 15:50:25 +01:00
|
|
|
if (pc->use_interintra) {
|
|
|
|
vp9_cond_prob_update(&header_bc,
|
|
|
|
&pc->fc.interintra_prob,
|
|
|
|
VP9_UPD_INTERINTRA_PROB,
|
|
|
|
cpi->interintra_count);
|
|
|
|
}
|
|
|
|
#endif
|
2012-10-17 18:38:13 +02:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->prob_intra_coded, 8);
|
|
|
|
vp9_write_literal(&header_bc, pc->prob_last_coded, 8);
|
|
|
|
vp9_write_literal(&header_bc, pc->prob_gf_coded, 8);
|
2012-10-17 18:38:13 +02:00
|
|
|
|
|
|
|
{
|
|
|
|
const int comp_pred_mode = cpi->common.comp_pred_mode;
|
|
|
|
const int use_compound_pred = (comp_pred_mode != SINGLE_PREDICTION_ONLY);
|
|
|
|
const int use_hybrid_pred = (comp_pred_mode == HYBRID_PREDICTION);
|
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(&header_bc, use_compound_pred, 128);
|
2012-10-17 18:38:13 +02:00
|
|
|
if (use_compound_pred) {
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write(&header_bc, use_hybrid_pred, 128);
|
2012-10-17 18:38:13 +02:00
|
|
|
if (use_hybrid_pred) {
|
|
|
|
for (i = 0; i < COMP_PRED_CONTEXTS; i++) {
|
2012-10-19 01:27:30 +02:00
|
|
|
pc->prob_comppred[i] = get_binary_prob(cpi->single_pred_count[i],
|
|
|
|
cpi->comp_pred_count[i]);
|
2012-10-31 22:40:53 +01:00
|
|
|
vp9_write_literal(&header_bc, pc->prob_comppred[i], 8);
|
2012-10-17 18:38:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-10-18 01:47:38 +02:00
|
|
|
update_mbintra_mode_probs(cpi, &header_bc);
|
2012-10-17 18:38:13 +02:00
|
|
|
|
2012-11-09 19:52:08 +01:00
|
|
|
vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc);
|
2012-10-17 18:38:13 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
/* tiling */
|
2013-02-07 00:30:21 +01:00
|
|
|
{
|
|
|
|
int min_log2_tiles, delta_log2_tiles, n_tile_bits, n;
|
|
|
|
|
|
|
|
vp9_get_tile_n_bits(pc, &min_log2_tiles, &delta_log2_tiles);
|
|
|
|
n_tile_bits = pc->log2_tile_columns - min_log2_tiles;
|
|
|
|
for (n = 0; n < delta_log2_tiles; n++) {
|
|
|
|
if (n_tile_bits--) {
|
|
|
|
vp9_write_bit(&header_bc, 1);
|
|
|
|
} else {
|
|
|
|
vp9_write_bit(&header_bc, 0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-02-08 20:33:11 +01:00
|
|
|
vp9_write_bit(&header_bc, pc->log2_tile_rows != 0);
|
|
|
|
if (pc->log2_tile_rows != 0)
|
|
|
|
vp9_write_bit(&header_bc, pc->log2_tile_rows != 1);
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
vp9_stop_encode(&header_bc);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-18 01:47:38 +02:00
|
|
|
oh.first_partition_length_in_bytes = header_bc.pos;
|
2011-03-11 11:34:57 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* update frame tag */
|
|
|
|
{
|
|
|
|
int v = (oh.first_partition_length_in_bytes << 5) |
|
|
|
|
(oh.show_frame << 4) |
|
|
|
|
(oh.version << 1) |
|
|
|
|
oh.type;
|
|
|
|
|
|
|
|
dest[0] = v;
|
|
|
|
dest[1] = v >> 8;
|
|
|
|
dest[2] = v >> 16;
|
|
|
|
}
|
2011-03-11 11:34:57 +01:00
|
|
|
|
2012-10-31 22:40:53 +01:00
|
|
|
*size = VP9_HEADER_SIZE + extra_bytes_packed + header_bc.pos;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-16 22:52:39 +02:00
|
|
|
if (pc->frame_type == KEY_FRAME) {
|
|
|
|
decide_kf_ymode_entropy(cpi);
|
|
|
|
} else {
|
2012-11-09 19:52:08 +01:00
|
|
|
/* This is not required if the counts in cpi are consistent with the
|
|
|
|
* final packing pass */
|
|
|
|
// if (!cpi->dummy_packing) vp9_zero(cpi->NMVcount);
|
2012-10-16 22:52:39 +02:00
|
|
|
}
|
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
{
|
2013-02-08 20:33:11 +01:00
|
|
|
int tile_row, tile_col, total_size = 0;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
unsigned char *data_ptr = cx_data + header_bc.pos;
|
2013-02-08 20:33:11 +01:00
|
|
|
TOKENEXTRA *tok[1 << 6], *tok_end;
|
|
|
|
|
|
|
|
tok[0] = cpi->tok;
|
|
|
|
for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
|
|
|
|
tok[tile_col] = tok[tile_col - 1] + cpi->tok_count[tile_col - 1];
|
|
|
|
|
|
|
|
for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
|
|
|
|
vp9_get_tile_row_offsets(pc, tile_row);
|
|
|
|
tok_end = cpi->tok + cpi->tok_count[0];
|
|
|
|
for (tile_col = 0; tile_col < pc->tile_columns;
|
|
|
|
tile_col++, tok_end += cpi->tok_count[tile_col]) {
|
|
|
|
vp9_get_tile_col_offsets(pc, tile_col);
|
|
|
|
|
|
|
|
if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1)
|
|
|
|
vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
|
|
|
|
else
|
|
|
|
vp9_start_encode(&residual_bc, data_ptr + total_size);
|
|
|
|
write_modes(cpi, &residual_bc, &tok[tile_col], tok_end);
|
|
|
|
vp9_stop_encode(&residual_bc);
|
|
|
|
if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) {
|
|
|
|
/* size of this tile */
|
|
|
|
data_ptr[total_size + 0] = residual_bc.pos;
|
|
|
|
data_ptr[total_size + 1] = residual_bc.pos >> 8;
|
|
|
|
data_ptr[total_size + 2] = residual_bc.pos >> 16;
|
|
|
|
data_ptr[total_size + 3] = residual_bc.pos >> 24;
|
|
|
|
total_size += 4;
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-02-08 20:33:11 +01:00
|
|
|
total_size += residual_bc.pos;
|
|
|
|
}
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
}
|
|
|
|
|
2013-02-08 20:33:11 +01:00
|
|
|
assert((unsigned int)(tok[0] - cpi->tok) == cpi->tok_count[0]);
|
|
|
|
for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
|
|
|
|
assert((unsigned int)(tok[tile_col] - tok[tile_col - 1]) ==
|
|
|
|
cpi->tok_count[tile_col]);
|
|
|
|
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
*size += total_size;
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef ENTROPY_STATS
|
2012-12-08 01:09:59 +01:00
|
|
|
static void print_tree_update_for_type(FILE *f,
|
|
|
|
vp9_coeff_stats *tree_update_hist,
|
|
|
|
int block_types, const char *header) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i, j, k, l;
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
fprintf(f, "const vp9_coeff_prob %s = {\n", header);
|
|
|
|
for (i = 0; i < block_types; i++) {
|
2012-07-14 00:21:29 +02:00
|
|
|
fprintf(f, " { \n");
|
|
|
|
for (j = 0; j < COEF_BANDS; j++) {
|
|
|
|
fprintf(f, " {\n");
|
|
|
|
for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
|
|
|
|
fprintf(f, " {");
|
|
|
|
for (l = 0; l < ENTROPY_NODES; l++) {
|
2012-12-08 01:09:59 +01:00
|
|
|
fprintf(f, "%3d, ",
|
|
|
|
get_binary_prob(tree_update_hist[i][j][k][l][0],
|
|
|
|
tree_update_hist[i][j][k][l][1]));
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
fprintf(f, "},\n");
|
|
|
|
}
|
|
|
|
fprintf(f, " },\n");
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
fprintf(f, " },\n");
|
|
|
|
}
|
|
|
|
fprintf(f, "};\n");
|
2012-12-08 01:09:59 +01:00
|
|
|
}
|
2012-05-08 21:38:39 +02:00
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
void print_tree_update_probs() {
|
|
|
|
FILE *f = fopen("coefupdprob.h", "w");
|
|
|
|
fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n");
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-12-08 01:09:59 +01:00
|
|
|
print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES_4X4,
|
|
|
|
"vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]");
|
2013-02-15 21:09:05 +01:00
|
|
|
print_tree_update_for_type(f, hybrid_tree_update_hist_4x4,
|
|
|
|
BLOCK_TYPES_4X4_HYBRID,
|
|
|
|
"vp9_coef_update_probs_4x4"
|
|
|
|
"[BLOCK_TYPES_4X4_HYBRID]");
|
2012-12-08 01:09:59 +01:00
|
|
|
print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES_8X8,
|
|
|
|
"vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]");
|
2013-02-15 21:09:05 +01:00
|
|
|
print_tree_update_for_type(f, hybrid_tree_update_hist_8x8,
|
|
|
|
BLOCK_TYPES_8X8_HYBRID,
|
|
|
|
"vp9_coef_update_probs_8x8"
|
|
|
|
"[BLOCK_TYPES_8X8_HYBRID]");
|
2012-12-08 01:09:59 +01:00
|
|
|
print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES_16X16,
|
|
|
|
"vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]");
|
|
|
|
print_tree_update_for_type(f, hybrid_tree_update_hist_16x16,
|
2013-02-15 21:09:05 +01:00
|
|
|
BLOCK_TYPES_16X16_HYBRID,
|
|
|
|
"vp9_coef_update_probs_16x16"
|
|
|
|
"[BLOCK_TYPES_16X16_HYBRID]");
|
2012-12-08 01:09:59 +01:00
|
|
|
print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32,
|
|
|
|
"vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]");
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
fclose(f);
|
|
|
|
f = fopen("treeupdate.bin", "wb");
|
2012-12-08 01:09:59 +01:00
|
|
|
fwrite(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f);
|
2012-07-14 00:21:29 +02:00
|
|
|
fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
|
2012-08-03 02:03:14 +02:00
|
|
|
fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
|
2012-07-14 00:21:29 +02:00
|
|
|
fclose(f);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
#endif
|