add a context tree structure to encoder
This patch sets up a quad_tree structure (pc_tree) for holding all of
pick_mode_context data we use at any square block size during encoding
or picking modes. That includes contexts for 2 horizontal and 2 vertical
splits, one none, and pointers to 4 sub pc_tree nodes corresponding
to split. It also includes a pointer to the current chosen partitioning.
This replaces code that held an index for every level in the pick
modes array including: sb_index, mb_index,
b_index, ab_index.
These were used as stateful indexes that pointed to the current pick mode
contexts you had at each level stored in the following arrays
array ab4x4_context[][][],
sb8x4_context[][][], sb4x8_context[][][], sb8x8_context[][][],
sb8x16_context[][][], sb16x8_context[][][], mb_context[][], sb32x16[][],
sb16x32[], sb32_context[], sb32x64_context[], sb64x32_context[],
sb64_context
and the partitioning that had been stored in the following:
b_partitioning, mb_partitioning, sb_partitioning, and sb64_partitioning.
Prior to this patch before doing an encode you had to set the appropriate
index for your block size ( switch statement), update it ( up to 3
lookups for the index array value) and then make your call into a recursive
function at which point you'd have to call get_context which then
had to do a switch statement based on the blocksize, and then up to 3
lookups based upon the block size to find the context to use.
With the new code the context for the block size is passed around directly
avoiding the extraneous switch statements and multi dimensional array
look ups that were listed above. At any level in the search all of the
contexts are local to the pc_tree you are working on (in?).
In addition in most places code that used to call sub functions and
then check if the block size was 4x4 and index was > 0 and return
now don't preferring instead to call the right none function on the inside.
Change-Id: I06e39318269d9af2ce37961b3f95e181b57f5ed9
2014-04-17 16:30:55 +02:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license
|
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
|
|
|
* in the file PATENTS. All contributing project authors may
|
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_
|
|
|
|
#define VP9_ENCODER_VP9_CONTEXT_TREE_H_
|
|
|
|
|
2014-10-07 21:13:33 +02:00
|
|
|
#include "vp9/common/vp9_blockd.h"
|
2015-06-29 18:27:11 +02:00
|
|
|
#include "vp9/encoder/vp9_block.h"
|
add a context tree structure to encoder
This patch sets up a quad_tree structure (pc_tree) for holding all of
pick_mode_context data we use at any square block size during encoding
or picking modes. That includes contexts for 2 horizontal and 2 vertical
splits, one none, and pointers to 4 sub pc_tree nodes corresponding
to split. It also includes a pointer to the current chosen partitioning.
This replaces code that held an index for every level in the pick
modes array including: sb_index, mb_index,
b_index, ab_index.
These were used as stateful indexes that pointed to the current pick mode
contexts you had at each level stored in the following arrays
array ab4x4_context[][][],
sb8x4_context[][][], sb4x8_context[][][], sb8x8_context[][][],
sb8x16_context[][][], sb16x8_context[][][], mb_context[][], sb32x16[][],
sb16x32[], sb32_context[], sb32x64_context[], sb64x32_context[],
sb64_context
and the partitioning that had been stored in the following:
b_partitioning, mb_partitioning, sb_partitioning, and sb64_partitioning.
Prior to this patch before doing an encode you had to set the appropriate
index for your block size ( switch statement), update it ( up to 3
lookups for the index array value) and then make your call into a recursive
function at which point you'd have to call get_context which then
had to do a switch statement based on the blocksize, and then up to 3
lookups based upon the block size to find the context to use.
With the new code the context for the block size is passed around directly
avoiding the extraneous switch statements and multi dimensional array
look ups that were listed above. At any level in the search all of the
contexts are local to the pc_tree you are working on (in?).
In addition in most places code that used to call sub functions and
then check if the block size was 4x4 and index was > 0 and return
now don't preferring instead to call the right none function on the inside.
Change-Id: I06e39318269d9af2ce37961b3f95e181b57f5ed9
2014-04-17 16:30:55 +02:00
|
|
|
|
2015-09-10 06:04:27 +02:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2014-05-15 21:01:34 +02:00
|
|
|
struct VP9_COMP;
|
2014-10-07 21:13:33 +02:00
|
|
|
struct VP9Common;
|
2014-11-21 20:11:06 +01:00
|
|
|
struct ThreadData;
|
2014-05-15 21:01:34 +02:00
|
|
|
|
|
|
|
// Structure to hold snapshot of coding context during the mode picking process
|
|
|
|
typedef struct {
|
|
|
|
MODE_INFO mic;
|
2015-06-29 18:27:11 +02:00
|
|
|
MB_MODE_INFO_EXT mbmi_ext;
|
2014-05-15 21:01:34 +02:00
|
|
|
uint8_t *zcoeff_blk;
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *coeff[MAX_MB_PLANE][3];
|
|
|
|
tran_low_t *qcoeff[MAX_MB_PLANE][3];
|
|
|
|
tran_low_t *dqcoeff[MAX_MB_PLANE][3];
|
2014-05-15 21:01:34 +02:00
|
|
|
uint16_t *eobs[MAX_MB_PLANE][3];
|
|
|
|
|
|
|
|
// dual buffer pointers, 0: in use, 1: best in store
|
2014-09-03 01:34:09 +02:00
|
|
|
tran_low_t *coeff_pbuf[MAX_MB_PLANE][3];
|
|
|
|
tran_low_t *qcoeff_pbuf[MAX_MB_PLANE][3];
|
|
|
|
tran_low_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
|
2014-05-15 21:01:34 +02:00
|
|
|
uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
|
|
|
|
|
|
|
|
int is_coded;
|
|
|
|
int num_4x4_blk;
|
|
|
|
int skip;
|
Hybrid partition search for rtc coding mode
This commit re-designs the recursive partition search scheme in
rtc speed -5. It first checks if the current block is under cyclic
refresh mode. If so, apply recursive partition search. Otherwise,
perform sub-sampled pixel based partition selection. When the
pre-selection finds the partition size should be 32x32 or above,
use the partition size directly. Otherwise, apply partition search
at nearby levels around the preset partition size.
It is enabled in speed -5. The compression performance of rtc
speed -5 is improved by 9.4%. Speed wise, the run-time goes slower
from 1% to 10%.
nik_720p, 1000 kbps
33220 b/f, 38.977 dB, 10109 ms -> 33200 b/f, 39.119 dB, 10210 ms
vidyo1_720p, 1000 kbps
16536 b/f, 40.495 dB, 10119 ms -> 16536 b/f, 40.827 dB, 11287 ms
Change-Id: I65adba352e3adc03bae50854ddaea1b421653c6c
2014-10-17 17:58:28 +02:00
|
|
|
int pred_pixel_ready;
|
Early termination in encoding partition search
In the partition search, the encoder checks all possible
partitionings in the superblock's partition search tree.
This patch proposed a set of criteria for partition search
early termination, which effectively decided whether or
not to terminate the search in current branch based on the
"skippable" result of the quantized transform coefficients.
The "skippable" information was gathered during the
partition mode search, and no overhead calculations were
introduced.
This patch gives significant encoding speed gains without
sacrificing the quality.
Borg test results:
1. At speed 1,
stdhd set: psnr: +0.074%, ssim: +0.093%;
derf set: psnr: -0.024%, ssim: +0.011%;
2. At speed 2,
stdhd set: psnr: +0.033%, ssim: +0.100%;
derf set: psnr: -0.062%, ssim: +0.003%;
3. At speed 3,
stdhd set: psnr: +0.060%, ssim: +0.190%;
derf set: psnr: -0.064%, ssim: -0.002%;
4. At speed 4,
stdhd set: psnr: +0.070%, ssim: +0.143%;
derf set: psnr: -0.104%, ssim: +0.039%;
The speedup ranges from several percent to 60+%.
speed1 speed2 speed3 speed4
(1080p, 100f):
old_town_cross: 48.2% 23.9% 20.8% 16.5%
park_joy: 11.4% 17.8% 29.4% 18.2%
pedestrian_area: 10.7% 4.0% 4.2% 2.4%
(720p, 200f):
mobcal: 68.1% 36.3% 34.4% 17.7%
parkrun: 15.8% 24.2% 37.1% 16.8%
shields: 45.1% 32.8% 30.1% 9.6%
(cif, 300f)
bus: 3.7% 10.4% 14.0% 7.9%
deadline: 13.6% 14.8% 12.6% 10.9%
mobile: 5.3% 11.5% 14.7% 10.7%
Change-Id: I246c38fb952ad762ce5e365711235b605f470a66
2014-08-15 02:25:21 +02:00
|
|
|
// For current partition, only if all Y, U, and V transform blocks'
|
|
|
|
// coefficients are quantized to 0, skippable is set to 0.
|
|
|
|
int skippable;
|
2014-09-03 02:32:12 +02:00
|
|
|
uint8_t skip_txfm[MAX_MB_PLANE << 2];
|
2014-05-15 21:01:34 +02:00
|
|
|
int best_mode_index;
|
|
|
|
int hybrid_pred_diff;
|
|
|
|
int comp_pred_diff;
|
|
|
|
int single_pred_diff;
|
|
|
|
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
|
|
|
|
|
2014-11-04 23:32:04 +01:00
|
|
|
// TODO(jingning) Use RD_COST struct here instead. This involves a boarder
|
|
|
|
// scope of refactoring.
|
|
|
|
int rate;
|
|
|
|
int64_t dist;
|
|
|
|
|
2014-07-24 22:30:00 +02:00
|
|
|
#if CONFIG_VP9_TEMPORAL_DENOISING
|
2014-07-08 17:36:25 +02:00
|
|
|
unsigned int newmv_sse;
|
|
|
|
unsigned int zeromv_sse;
|
2015-11-19 02:04:15 +01:00
|
|
|
unsigned int zeromv_lastref_sse;
|
2014-07-08 17:36:25 +02:00
|
|
|
PREDICTION_MODE best_sse_inter_mode;
|
|
|
|
int_mv best_sse_mv;
|
|
|
|
MV_REFERENCE_FRAME best_reference_frame;
|
|
|
|
MV_REFERENCE_FRAME best_zeromv_reference_frame;
|
|
|
|
#endif
|
|
|
|
|
2014-05-15 21:01:34 +02:00
|
|
|
// motion vector cache for adaptive motion search control in partition
|
|
|
|
// search loop
|
2014-05-19 20:43:07 +02:00
|
|
|
MV pred_mv[MAX_REF_FRAMES];
|
2014-05-15 21:01:34 +02:00
|
|
|
INTERP_FILTER pred_interp_filter;
|
2017-02-27 23:26:15 +01:00
|
|
|
|
|
|
|
// Used for the machine learning-based early termination
|
2017-03-16 23:45:07 +01:00
|
|
|
int32_t sum_y_eobs;
|
2014-05-15 21:01:34 +02:00
|
|
|
} PICK_MODE_CONTEXT;
|
|
|
|
|
|
|
|
typedef struct PC_TREE {
|
|
|
|
int index;
|
|
|
|
PARTITION_TYPE partitioning;
|
|
|
|
BLOCK_SIZE block_size;
|
|
|
|
PICK_MODE_CONTEXT none;
|
|
|
|
PICK_MODE_CONTEXT horizontal[2];
|
|
|
|
PICK_MODE_CONTEXT vertical[2];
|
|
|
|
union {
|
|
|
|
struct PC_TREE *split[4];
|
|
|
|
PICK_MODE_CONTEXT *leaf_split[4];
|
|
|
|
};
|
|
|
|
} PC_TREE;
|
|
|
|
|
2014-11-21 20:11:06 +01:00
|
|
|
void vp9_setup_pc_tree(struct VP9Common *cm, struct ThreadData *td);
|
|
|
|
void vp9_free_pc_tree(struct ThreadData *td);
|
add a context tree structure to encoder
This patch sets up a quad_tree structure (pc_tree) for holding all of
pick_mode_context data we use at any square block size during encoding
or picking modes. That includes contexts for 2 horizontal and 2 vertical
splits, one none, and pointers to 4 sub pc_tree nodes corresponding
to split. It also includes a pointer to the current chosen partitioning.
This replaces code that held an index for every level in the pick
modes array including: sb_index, mb_index,
b_index, ab_index.
These were used as stateful indexes that pointed to the current pick mode
contexts you had at each level stored in the following arrays
array ab4x4_context[][][],
sb8x4_context[][][], sb4x8_context[][][], sb8x8_context[][][],
sb8x16_context[][][], sb16x8_context[][][], mb_context[][], sb32x16[][],
sb16x32[], sb32_context[], sb32x64_context[], sb64x32_context[],
sb64_context
and the partitioning that had been stored in the following:
b_partitioning, mb_partitioning, sb_partitioning, and sb64_partitioning.
Prior to this patch before doing an encode you had to set the appropriate
index for your block size ( switch statement), update it ( up to 3
lookups for the index array value) and then make your call into a recursive
function at which point you'd have to call get_context which then
had to do a switch statement based on the blocksize, and then up to 3
lookups based upon the block size to find the context to use.
With the new code the context for the block size is passed around directly
avoiding the extraneous switch statements and multi dimensional array
look ups that were listed above. At any level in the search all of the
contexts are local to the pc_tree you are working on (in?).
In addition in most places code that used to call sub functions and
then check if the block size was 4x4 and index was > 0 and return
now don't preferring instead to call the right none function on the inside.
Change-Id: I06e39318269d9af2ce37961b3f95e181b57f5ed9
2014-04-17 16:30:55 +02:00
|
|
|
|
2015-09-10 06:04:27 +02:00
|
|
|
#ifdef __cplusplus
|
|
|
|
} // extern "C"
|
|
|
|
#endif
|
|
|
|
|
add a context tree structure to encoder
This patch sets up a quad_tree structure (pc_tree) for holding all of
pick_mode_context data we use at any square block size during encoding
or picking modes. That includes contexts for 2 horizontal and 2 vertical
splits, one none, and pointers to 4 sub pc_tree nodes corresponding
to split. It also includes a pointer to the current chosen partitioning.
This replaces code that held an index for every level in the pick
modes array including: sb_index, mb_index,
b_index, ab_index.
These were used as stateful indexes that pointed to the current pick mode
contexts you had at each level stored in the following arrays
array ab4x4_context[][][],
sb8x4_context[][][], sb4x8_context[][][], sb8x8_context[][][],
sb8x16_context[][][], sb16x8_context[][][], mb_context[][], sb32x16[][],
sb16x32[], sb32_context[], sb32x64_context[], sb64x32_context[],
sb64_context
and the partitioning that had been stored in the following:
b_partitioning, mb_partitioning, sb_partitioning, and sb64_partitioning.
Prior to this patch before doing an encode you had to set the appropriate
index for your block size ( switch statement), update it ( up to 3
lookups for the index array value) and then make your call into a recursive
function at which point you'd have to call get_context which then
had to do a switch statement based on the blocksize, and then up to 3
lookups based upon the block size to find the context to use.
With the new code the context for the block size is passed around directly
avoiding the extraneous switch statements and multi dimensional array
look ups that were listed above. At any level in the search all of the
contexts are local to the pc_tree you are working on (in?).
In addition in most places code that used to call sub functions and
then check if the block size was 4x4 and index was > 0 and return
now don't preferring instead to call the right none function on the inside.
Change-Id: I06e39318269d9af2ce37961b3f95e181b57f5ed9
2014-04-17 16:30:55 +02:00
|
|
|
#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */
|