Merge "Creates a new speed 1:" into experimental
This commit is contained in:
commit
21595f8e38
@ -208,7 +208,6 @@ specialize vp9_short_iht16x16_add
|
||||
|
||||
prototype void vp9_idct4_1d "int16_t *input, int16_t *output"
|
||||
specialize vp9_idct4_1d sse2
|
||||
|
||||
# dct and add
|
||||
|
||||
prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
|
||||
@ -265,6 +264,10 @@ specialize vp9_variance8x16 mmx sse2
|
||||
prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
|
||||
specialize vp9_variance8x8 mmx sse2
|
||||
|
||||
prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"
|
||||
specialize vp9_get_sse_sum_8x8 sse2
|
||||
vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2
|
||||
|
||||
prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
|
||||
specialize vp9_variance8x4 sse2
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/encoder/vp9_encodeframe.h"
|
||||
#include "vp9/encoder/vp9_encodemb.h"
|
||||
#include "vp9/encoder/vp9_encodemv.h"
|
||||
@ -97,6 +98,8 @@ static unsigned int alt_activity_measure(VP9_COMP *cpi,
|
||||
return vp9_encode_intra(cpi, x, use_dc_pred);
|
||||
}
|
||||
|
||||
DECLARE_ALIGNED(16, static const uint8_t, vp9_64x64_zeros[64*64]) = { 0 };
|
||||
|
||||
|
||||
// Measure the activity of the current macroblock
|
||||
// What we measure here is TBD so abstracted to this function
|
||||
@ -769,6 +772,35 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
|
||||
vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl,
|
||||
sizeof(PARTITION_CONTEXT) * mh);
|
||||
}
|
||||
static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
|
||||
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
|
||||
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
|
||||
PARTITION_CONTEXT sa[8],
|
||||
PARTITION_CONTEXT sl[8],
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
MACROBLOCK *const x = &cpi->mb;
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
int p;
|
||||
int bwl = b_width_log2(bsize), bw = 1 << bwl;
|
||||
int bhl = b_height_log2(bsize), bh = 1 << bhl;
|
||||
int mwl = mi_width_log2(bsize), mw = 1 << mwl;
|
||||
int mhl = mi_height_log2(bsize), mh = 1 << mhl;
|
||||
|
||||
// buffer the above/left context information of the block in search.
|
||||
for (p = 0; p < MAX_MB_PLANE; ++p) {
|
||||
vpx_memcpy(a + bw * p, cm->above_context[p] +
|
||||
(mi_col * 2 >> xd->plane[p].subsampling_x),
|
||||
sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
|
||||
vpx_memcpy(l + bh * p, cm->left_context[p] +
|
||||
((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
|
||||
sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
|
||||
}
|
||||
vpx_memcpy(sa, cm->above_seg_context + mi_col,
|
||||
sizeof(PARTITION_CONTEXT) * mw);
|
||||
vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
|
||||
sizeof(PARTITION_CONTEXT) * mh);
|
||||
}
|
||||
|
||||
static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
|
||||
int mi_row, int mi_col, int output_enabled,
|
||||
@ -861,6 +893,337 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
|
||||
}
|
||||
}
|
||||
|
||||
static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m,
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
const int mis = cm->mode_info_stride;
|
||||
int bsl = b_width_log2(bsize);
|
||||
int bs = (1 << bsl) / 2; //
|
||||
int block_row, block_col;
|
||||
int row, col;
|
||||
|
||||
// this test function sets the entire macroblock to the same bsize
|
||||
for (block_row = 0; block_row < 8; block_row += bs) {
|
||||
for (block_col = 0; block_col < 8; block_col += bs) {
|
||||
for (row = 0; row < bs; row++) {
|
||||
for (col = 0; col < bs; col++) {
|
||||
m[(block_row+row)*mis + block_col+col].mbmi.sb_type = bsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void set_block_size(VP9_COMMON *const cm,
|
||||
MODE_INFO *m, BLOCK_SIZE_TYPE bsize, int mis,
|
||||
int mi_row, int mi_col) {
|
||||
int row, col;
|
||||
int bsl = b_width_log2(bsize);
|
||||
int bs = (1 << bsl) / 2; //
|
||||
MODE_INFO *m2 = m + mi_row * mis + mi_col;
|
||||
for (row = 0; row < bs; row++) {
|
||||
for (col = 0; col < bs; col++) {
|
||||
if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols)
|
||||
return;
|
||||
m2[row*mis+col].mbmi.sb_type = bsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
typedef struct {
|
||||
int64_t sum_square_error;
|
||||
int64_t sum_error;
|
||||
int count;
|
||||
int variance;
|
||||
} var;
|
||||
|
||||
#define VT(TYPE, BLOCKSIZE) \
|
||||
typedef struct { \
|
||||
var none; \
|
||||
var horz[2]; \
|
||||
var vert[2]; \
|
||||
BLOCKSIZE split[4]; } TYPE;
|
||||
|
||||
VT(v8x8, var)
|
||||
VT(v16x16, v8x8)
|
||||
VT(v32x32, v16x16)
|
||||
VT(v64x64, v32x32)
|
||||
|
||||
typedef enum {
|
||||
V16X16,
|
||||
V32X32,
|
||||
V64X64,
|
||||
} TREE_LEVEL;
|
||||
|
||||
// Set variance values given sum square error, sum error, count.
|
||||
static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
|
||||
v->sum_square_error = s2;
|
||||
v->sum_error = s;
|
||||
v->count = c;
|
||||
v->variance = 256
|
||||
* (v->sum_square_error - v->sum_error * v->sum_error / v->count)
|
||||
/ v->count;
|
||||
}
|
||||
|
||||
// Fills a 16x16 variance tree node by calling get var8x8 var..
|
||||
static void fill_16x16_variance(const unsigned char *s, int sp,
|
||||
const unsigned char *d, int dp, v16x16 *vt) {
|
||||
unsigned int sse;
|
||||
int sum;
|
||||
vp9_get_sse_sum_8x8(s, sp, d, dp, &sse, &sum);
|
||||
fill_variance(&vt->split[0].none, sse, sum, 64);
|
||||
vp9_get_sse_sum_8x8(s + 8, sp, d + 8, dp, &sse, &sum);
|
||||
fill_variance(&vt->split[1].none, sse, sum, 64);
|
||||
vp9_get_sse_sum_8x8(s + 8 * sp, sp, d + 8 * dp, dp, &sse, &sum);
|
||||
fill_variance(&vt->split[2].none, sse, sum, 64);
|
||||
vp9_get_sse_sum_8x8(s + 8 * sp + 8, sp, d + 8 + 8 * dp, dp, &sse, &sum);
|
||||
fill_variance(&vt->split[3].none, sse, sum, 64);
|
||||
}
|
||||
|
||||
// Combine 2 variance structures by summing the sum_error, sum_square_error,
|
||||
// and counts and then calculating the new variance.
|
||||
void sum_2_variances(var *r, var *a, var*b) {
|
||||
fill_variance(r, a->sum_square_error + b->sum_square_error,
|
||||
a->sum_error + b->sum_error, a->count + b->count);
|
||||
}
|
||||
// Fill one level of our variance tree, by summing the split sums into each of
|
||||
// the horizontal, vertical and none from split and recalculating variance.
|
||||
#define fill_variance_tree(VT) \
|
||||
sum_2_variances(VT.horz[0], VT.split[0].none, VT.split[1].none); \
|
||||
sum_2_variances(VT.horz[1], VT.split[2].none, VT.split[3].none); \
|
||||
sum_2_variances(VT.vert[0], VT.split[0].none, VT.split[2].none); \
|
||||
sum_2_variances(VT.vert[1], VT.split[1].none, VT.split[3].none); \
|
||||
sum_2_variances(VT.none, VT.vert[0], VT.vert[1]);
|
||||
|
||||
// Set the blocksize in the macroblock info structure if the variance is less
|
||||
// than our threshold to one of none, horz, vert.
|
||||
#define set_vt_size(VT, BLOCKSIZE, R, C, ACTION) \
|
||||
if (VT.none.variance < threshold) { \
|
||||
set_block_size(cm, m, BLOCKSIZE, mis, R, C); \
|
||||
ACTION; \
|
||||
} \
|
||||
if (VT.horz[0].variance < threshold && VT.horz[1].variance < threshold ) { \
|
||||
set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_HORZ), mis, R, C); \
|
||||
ACTION; \
|
||||
} \
|
||||
if (VT.vert[0].variance < threshold && VT.vert[1].variance < threshold ) { \
|
||||
set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_VERT), mis, R, C); \
|
||||
ACTION; \
|
||||
}
|
||||
|
||||
static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
|
||||
int mi_col) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
MACROBLOCK *x = &cpi->mb;
|
||||
MACROBLOCKD *xd = &cpi->mb.e_mbd;
|
||||
const int mis = cm->mode_info_stride;
|
||||
// TODO(JBB): More experimentation or testing of this threshold;
|
||||
int64_t threshold = 4;
|
||||
int i, j, k;
|
||||
v64x64 vt;
|
||||
unsigned char * s;
|
||||
int sp;
|
||||
const unsigned char * d = xd->plane[0].pre->buf;
|
||||
int dp = xd->plane[0].pre->stride;
|
||||
|
||||
set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
|
||||
s = x->plane[0].src.buf;
|
||||
sp = x->plane[0].src.stride;
|
||||
|
||||
// TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
|
||||
// but this needs more experimentation.
|
||||
threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
|
||||
|
||||
// if ( cm->frame_type == KEY_FRAME ) {
|
||||
d = vp9_64x64_zeros;
|
||||
dp = 64;
|
||||
// }
|
||||
// Fill in the entire tree of 8x8 variances for splits.
|
||||
for (i = 0; i < 4; i++) {
|
||||
const int x32_idx = ((i & 1) << 5);
|
||||
const int y32_idx = ((i >> 1) << 5);
|
||||
for (j = 0; j < 4; j++) {
|
||||
const int x_idx = x32_idx + ((j & 1) << 4);
|
||||
const int y_idx = y32_idx + ((j >> 1) << 4);
|
||||
fill_16x16_variance(s + y_idx * sp + x_idx, sp, d + y_idx * dp + x_idx,
|
||||
dp, &vt.split[i].split[j]);
|
||||
}
|
||||
}
|
||||
// Fill the rest of the variance tree by summing the split partition
|
||||
// values.
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
fill_variance_tree(&vt.split[i].split[j])
|
||||
}
|
||||
fill_variance_tree(&vt.split[i])
|
||||
}
|
||||
fill_variance_tree(&vt)
|
||||
|
||||
// Now go through the entire structure, splitting every blocksize until
|
||||
// we get to one that's got a variance lower than our threshold, or we
|
||||
// hit 8x8.
|
||||
set_vt_size( vt, BLOCK_SIZE_SB64X64, mi_row, mi_col, return);
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const int x32_idx = ((i & 1) << 2);
|
||||
const int y32_idx = ((i >> 1) << 2);
|
||||
set_vt_size(vt, BLOCK_SIZE_SB32X32, mi_row + y32_idx, mi_col + x32_idx,
|
||||
continue);
|
||||
|
||||
for (j = 0; j < 4; ++j) {
|
||||
const int x16_idx = ((j & 1) << 1);
|
||||
const int y16_idx = ((j >> 1) << 1);
|
||||
set_vt_size(vt, BLOCK_SIZE_MB16X16, mi_row + y32_idx + y16_idx,
|
||||
mi_col+x32_idx+x16_idx, continue);
|
||||
|
||||
for (k = 0; k < 4; ++k) {
|
||||
const int x8_idx = (k & 1);
|
||||
const int y8_idx = (k >> 1);
|
||||
set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis,
|
||||
mi_row + y32_idx + y16_idx + y8_idx,
|
||||
mi_col + x32_idx + x16_idx + x8_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
||||
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
|
||||
int *rate, int *dist) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
MACROBLOCK * const x = &cpi->mb;
|
||||
MACROBLOCKD *xd = &cpi->mb.e_mbd;
|
||||
const int mis = cm->mode_info_stride;
|
||||
int bwl, bhl;
|
||||
int bsl = b_width_log2(bsize);
|
||||
int bs = (1 << bsl);
|
||||
int bss = (1 << bsl)/4;
|
||||
int i, pl;
|
||||
PARTITION_TYPE partition;
|
||||
BLOCK_SIZE_TYPE subsize;
|
||||
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
|
||||
PARTITION_CONTEXT sl[8], sa[8];
|
||||
int r = 0, d = 0;
|
||||
|
||||
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
|
||||
return;
|
||||
|
||||
|
||||
bwl = b_width_log2(m->mbmi.sb_type);
|
||||
bhl = b_height_log2(m->mbmi.sb_type);
|
||||
|
||||
// parse the partition type
|
||||
if ((bwl == bsl) && (bhl == bsl))
|
||||
partition = PARTITION_NONE;
|
||||
else if ((bwl == bsl) && (bhl < bsl))
|
||||
partition = PARTITION_HORZ;
|
||||
else if ((bwl < bsl) && (bhl == bsl))
|
||||
partition = PARTITION_VERT;
|
||||
else if ((bwl < bsl) && (bhl < bsl))
|
||||
partition = PARTITION_SPLIT;
|
||||
else
|
||||
assert(0);
|
||||
|
||||
subsize = get_subsize(bsize, partition);
|
||||
|
||||
// TODO(JBB): this restriction is here because pick_sb_modes can return
|
||||
// r's that are INT_MAX meaning we can't select a mode / mv for this block.
|
||||
// when the code is made to work for less than sb8x8 we need to come up with
|
||||
// a solution to this problem.
|
||||
assert(subsize >= BLOCK_SIZE_SB8X8);
|
||||
|
||||
if (bsize >= BLOCK_SIZE_SB8X8) {
|
||||
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
|
||||
xd->above_seg_context = cm->above_seg_context + mi_col;
|
||||
*(get_sb_partitioning(x, bsize)) = subsize;
|
||||
}
|
||||
|
||||
pl = partition_plane_context(xd, bsize);
|
||||
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
||||
switch (partition) {
|
||||
case PARTITION_NONE:
|
||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
|
||||
get_block_context(x, bsize));
|
||||
r += x->partition_cost[pl][PARTITION_NONE];
|
||||
break;
|
||||
case PARTITION_HORZ:
|
||||
*(get_sb_index(xd, subsize)) = 0;
|
||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
|
||||
get_block_context(x, subsize));
|
||||
if (mi_row + (bs >> 1) <= cm->mi_rows) {
|
||||
int rt, dt;
|
||||
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
||||
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
||||
*(get_sb_index(xd, subsize)) = 1;
|
||||
pick_sb_modes(cpi, mi_row + (bs >> 2), mi_col, tp, &rt, &dt, subsize,
|
||||
get_block_context(x, subsize));
|
||||
r += rt;
|
||||
d += dt;
|
||||
}
|
||||
set_partition_seg_context(cm, xd, mi_row, mi_col);
|
||||
pl = partition_plane_context(xd, bsize);
|
||||
r += x->partition_cost[pl][PARTITION_HORZ];
|
||||
break;
|
||||
case PARTITION_VERT:
|
||||
*(get_sb_index(xd, subsize)) = 0;
|
||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
|
||||
get_block_context(x, subsize));
|
||||
if (mi_col + (bs >> 1) <= cm->mi_cols) {
|
||||
int rt, dt;
|
||||
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
||||
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
||||
*(get_sb_index(xd, subsize)) = 1;
|
||||
pick_sb_modes(cpi, mi_row, mi_col + (bs >> 2), tp, &rt, &dt, subsize,
|
||||
get_block_context(x, subsize));
|
||||
r += rt;
|
||||
d += dt;
|
||||
}
|
||||
set_partition_seg_context(cm, xd, mi_row, mi_col);
|
||||
pl = partition_plane_context(xd, bsize);
|
||||
r += x->partition_cost[pl][PARTITION_VERT];
|
||||
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
||||
break;
|
||||
case PARTITION_SPLIT:
|
||||
for (i = 0; i < 4; i++) {
|
||||
int x_idx = (i & 1) * (bs >> 2);
|
||||
int y_idx = (i >> 1) * (bs >> 2);
|
||||
int jj = i >> 1, ii = i & 0x01;
|
||||
int rt, dt;
|
||||
|
||||
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
|
||||
continue;
|
||||
|
||||
*(get_sb_index(xd, subsize)) = i;
|
||||
|
||||
rd_use_partition(cpi, m + jj * bss * mis + ii * bss, tp, mi_row + y_idx,
|
||||
mi_col + x_idx, subsize, &rt, &dt);
|
||||
r += rt;
|
||||
d += dt;
|
||||
}
|
||||
set_partition_seg_context(cm, xd, mi_row, mi_col);
|
||||
pl = partition_plane_context(xd, bsize);
|
||||
r += x->partition_cost[pl][PARTITION_SPLIT];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
// update partition context
|
||||
#if CONFIG_AB4X4
|
||||
if (bsize >= BLOCK_SIZE_SB8X8 &&
|
||||
(bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) {
|
||||
#else
|
||||
if (bsize > BLOCK_SIZE_SB8X8
|
||||
&& (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) {
|
||||
#endif
|
||||
set_partition_seg_context(cm, xd, mi_row, mi_col);
|
||||
update_partition_context(xd, subsize, bsize);
|
||||
}
|
||||
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
||||
|
||||
if (r < INT_MAX && d < INT_MAX)
|
||||
encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
|
||||
*rate = r;
|
||||
*dist = d;
|
||||
}
|
||||
|
||||
|
||||
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
|
||||
// unlikely to be selected depending on previously rate-distortion optimization
|
||||
@ -877,7 +1240,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
|
||||
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
|
||||
PARTITION_CONTEXT sl[8], sa[8];
|
||||
TOKENEXTRA *tp_orig = *tp;
|
||||
int i, p, pl;
|
||||
int i, pl;
|
||||
BLOCK_SIZE_TYPE subsize;
|
||||
int srate = INT_MAX, sdist = INT_MAX;
|
||||
|
||||
@ -889,19 +1252,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
|
||||
}
|
||||
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
|
||||
|
||||
// buffer the above/left context information of the block in search.
|
||||
for (p = 0; p < MAX_MB_PLANE; ++p) {
|
||||
vpx_memcpy(a + bs * p, cm->above_context[p] +
|
||||
(mi_col * 2 >> xd->plane[p].subsampling_x),
|
||||
sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_x);
|
||||
vpx_memcpy(l + bs * p, cm->left_context[p] +
|
||||
((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
|
||||
sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_y);
|
||||
}
|
||||
vpx_memcpy(sa, cm->above_seg_context + mi_col,
|
||||
sizeof(PARTITION_CONTEXT) * ms);
|
||||
vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
|
||||
sizeof(PARTITION_CONTEXT) * ms);
|
||||
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
||||
|
||||
// PARTITION_SPLIT
|
||||
if (bsize >= BLOCK_SIZE_SB8X8) {
|
||||
@ -1029,6 +1380,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
|
||||
*rate = srate;
|
||||
*dist = sdist;
|
||||
|
||||
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
||||
|
||||
if (srate < INT_MAX && sdist < INT_MAX)
|
||||
encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
|
||||
|
||||
@ -1054,8 +1407,22 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row,
|
||||
for (mi_col = cm->cur_tile_mi_col_start;
|
||||
mi_col < cm->cur_tile_mi_col_end; mi_col += 8) {
|
||||
int dummy_rate, dummy_dist;
|
||||
rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
|
||||
&dummy_rate, &dummy_dist);
|
||||
// TODO(JBB): remove the border conditions for 64x64 blocks once its fixed
|
||||
// without this border check choose will fail on the border of every
|
||||
// non 64x64.
|
||||
if (cpi->speed < 5 ||
|
||||
mi_col + 8 > cm->cur_tile_mi_col_end ||
|
||||
mi_row + 8 > cm->cur_tile_mi_row_end) {
|
||||
rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
|
||||
&dummy_rate, &dummy_dist);
|
||||
} else {
|
||||
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
|
||||
MODE_INFO *m = cm->mi + idx_str;
|
||||
// set_partitioning(cpi, m, BLOCK_SIZE_SB8X8);
|
||||
choose_partitioning(cpi, cm->mi, mi_row, mi_col);
|
||||
rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
|
||||
&dummy_rate, &dummy_dist);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -696,6 +696,25 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
|
||||
sf->thresh_mult[THR_COMP_SPLITGA ] += speed_multiplier * 4500;
|
||||
sf->thresh_mult[THR_COMP_SPLITLG ] += speed_multiplier * 4500;
|
||||
|
||||
if (speed > 4) {
|
||||
for (i = 0; i < MAX_MODES; ++i)
|
||||
sf->thresh_mult[i] = INT_MAX;
|
||||
|
||||
sf->thresh_mult[THR_DC ] = 0;
|
||||
sf->thresh_mult[THR_TM ] = 0;
|
||||
sf->thresh_mult[THR_NEWMV ] = 4000;
|
||||
sf->thresh_mult[THR_NEWG ] = 4000;
|
||||
sf->thresh_mult[THR_NEWA ] = 4000;
|
||||
sf->thresh_mult[THR_NEARESTMV] = 0;
|
||||
sf->thresh_mult[THR_NEARESTG ] = 0;
|
||||
sf->thresh_mult[THR_NEARESTA ] = 0;
|
||||
sf->thresh_mult[THR_NEARMV ] = 2000;
|
||||
sf->thresh_mult[THR_NEARG ] = 2000;
|
||||
sf->thresh_mult[THR_NEARA ] = 2000;
|
||||
sf->thresh_mult[THR_COMP_NEARESTLA] = 2000;
|
||||
sf->recode_loop = 0;
|
||||
}
|
||||
|
||||
/* disable frame modes if flags not set */
|
||||
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
|
||||
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
|
||||
@ -804,48 +823,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
||||
#endif
|
||||
#endif
|
||||
sf->mb16_breakout = 0;
|
||||
|
||||
if (speed > 0) {
|
||||
/* Disable coefficient optimization above speed 0 */
|
||||
sf->optimize_coefficients = 0;
|
||||
sf->no_skip_block4x4_search = 0;
|
||||
sf->comp_inter_joint_search = 0;
|
||||
|
||||
sf->first_step = 1;
|
||||
|
||||
cpi->mode_check_freq[THR_SPLITG] = 2;
|
||||
cpi->mode_check_freq[THR_SPLITA] = 2;
|
||||
cpi->mode_check_freq[THR_SPLITMV] = 0;
|
||||
|
||||
cpi->mode_check_freq[THR_COMP_SPLITGA] = 2;
|
||||
cpi->mode_check_freq[THR_COMP_SPLITLG] = 2;
|
||||
cpi->mode_check_freq[THR_COMP_SPLITLA] = 0;
|
||||
}
|
||||
|
||||
if (speed > 1) {
|
||||
cpi->mode_check_freq[THR_SPLITG] = 4;
|
||||
cpi->mode_check_freq[THR_SPLITA] = 4;
|
||||
cpi->mode_check_freq[THR_SPLITMV] = 2;
|
||||
|
||||
cpi->mode_check_freq[THR_COMP_SPLITGA] = 4;
|
||||
cpi->mode_check_freq[THR_COMP_SPLITLG] = 4;
|
||||
cpi->mode_check_freq[THR_COMP_SPLITLA] = 2;
|
||||
}
|
||||
|
||||
if (speed > 2) {
|
||||
cpi->mode_check_freq[THR_SPLITG] = 15;
|
||||
cpi->mode_check_freq[THR_SPLITA] = 15;
|
||||
cpi->mode_check_freq[THR_SPLITMV] = 7;
|
||||
|
||||
cpi->mode_check_freq[THR_COMP_SPLITGA] = 15;
|
||||
cpi->mode_check_freq[THR_COMP_SPLITLG] = 15;
|
||||
cpi->mode_check_freq[THR_COMP_SPLITLA] = 7;
|
||||
|
||||
// Only do recode loop on key frames, golden frames and
|
||||
// alt ref frames
|
||||
sf->recode_loop = 2;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
}; /* switch */
|
||||
|
@ -623,9 +623,25 @@ static void super_block_yrd(VP9_COMP *cpi,
|
||||
int64_t txfm_cache[NB_TXFM_MODES]) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
|
||||
|
||||
vp9_subtract_sby(x, bs);
|
||||
|
||||
if (cpi->speed > 4) {
|
||||
if (bs >= BLOCK_SIZE_SB32X32) {
|
||||
mbmi->txfm_size = TX_32X32;
|
||||
} else if (bs >= BLOCK_SIZE_MB16X16) {
|
||||
mbmi->txfm_size = TX_16X16;
|
||||
} else if (bs >= BLOCK_SIZE_SB8X8) {
|
||||
mbmi->txfm_size = TX_8X8;
|
||||
} else {
|
||||
mbmi->txfm_size = TX_4X4;
|
||||
}
|
||||
super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
|
||||
mbmi->txfm_size);
|
||||
return;
|
||||
}
|
||||
if (bs >= BLOCK_SIZE_SB32X32)
|
||||
super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
|
||||
bs, TX_32X32);
|
||||
@ -845,7 +861,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int64_t txfm_cache[NB_TXFM_MODES]) {
|
||||
MB_PREDICTION_MODE mode;
|
||||
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
int this_rate, this_rate_tokenonly;
|
||||
int this_distortion, s;
|
||||
int64_t best_rd = INT64_MAX, this_rd;
|
||||
@ -866,7 +882,6 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int64_t local_txfm_cache[NB_TXFM_MODES];
|
||||
MODE_INFO *const mic = xd->mode_info_context;
|
||||
const int mis = xd->mode_info_stride;
|
||||
|
||||
if (cpi->common.frame_type == KEY_FRAME) {
|
||||
const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
|
||||
const MB_PREDICTION_MODE L = xd->left_available ?
|
||||
@ -874,12 +889,12 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
|
||||
bmode_costs = x->y_mode_costs[A][L];
|
||||
}
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mode = mode;
|
||||
vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
|
||||
|
||||
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
|
||||
bsize, local_txfm_cache);
|
||||
|
||||
this_rate = this_rate_tokenonly + bmode_costs[mode];
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
|
||||
|
||||
@ -2277,7 +2292,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
(mbmi->mv[1].as_mv.col & 15) == 0;
|
||||
// Search for best switchable filter by checking the variance of
|
||||
// pred error irrespective of whether the filter will be used
|
||||
if (1) {
|
||||
if (cpi->speed > 4) {
|
||||
*best_filter = EIGHTTAP;
|
||||
} else {
|
||||
int i, newbest;
|
||||
int tmp_rate_sum = 0, tmp_dist_sum = 0;
|
||||
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
|
||||
@ -2414,6 +2431,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
// Y cost and distortion
|
||||
super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
|
||||
bsize, txfm_cache);
|
||||
|
||||
*rate2 += *rate_y;
|
||||
*distortion += *distortion_y;
|
||||
|
||||
|
@ -318,6 +318,11 @@ unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
}
|
||||
|
||||
void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
|
||||
const uint8_t *ref_ptr, int ref_stride,
|
||||
unsigned int *sse, int *sum) {
|
||||
variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
|
||||
}
|
||||
|
||||
unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
|
||||
int source_stride,
|
||||
|
Loading…
x
Reference in New Issue
Block a user