Merge "Creates a new speed 1:" into experimental

This commit is contained in:
Jim Bankoski 2013-05-30 20:36:05 -07:00 committed by Gerrit Code Review
commit 21595f8e38
5 changed files with 433 additions and 63 deletions

View File

@ -208,7 +208,6 @@ specialize vp9_short_iht16x16_add
prototype void vp9_idct4_1d "int16_t *input, int16_t *output"
specialize vp9_idct4_1d sse2
# dct and add
prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
@ -265,6 +264,10 @@ specialize vp9_variance8x16 mmx sse2
prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance8x8 mmx sse2
prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"
specialize vp9_get_sse_sum_8x8 sse2
vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2
prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance8x4 sse2

View File

@ -10,6 +10,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
@ -97,6 +98,8 @@ static unsigned int alt_activity_measure(VP9_COMP *cpi,
return vp9_encode_intra(cpi, x, use_dc_pred);
}
DECLARE_ALIGNED(16, static const uint8_t, vp9_64x64_zeros[64*64]) = { 0 };
// Measure the activity of the current macroblock
// What we measure here is TBD so abstracted to this function
@ -769,6 +772,35 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
vpx_memcpy(cm->left_seg_context + (mi_row & MI_MASK), sl,
sizeof(PARTITION_CONTEXT) * mh);
}
static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
PARTITION_CONTEXT sa[8],
PARTITION_CONTEXT sl[8],
BLOCK_SIZE_TYPE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
int p;
int bwl = b_width_log2(bsize), bw = 1 << bwl;
int bhl = b_height_log2(bsize), bh = 1 << bhl;
int mwl = mi_width_log2(bsize), mw = 1 << mwl;
int mhl = mi_height_log2(bsize), mh = 1 << mhl;
// buffer the above/left context information of the block in search.
for (p = 0; p < MAX_MB_PLANE; ++p) {
vpx_memcpy(a + bw * p, cm->above_context[p] +
(mi_col * 2 >> xd->plane[p].subsampling_x),
sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[p].subsampling_x);
vpx_memcpy(l + bh * p, cm->left_context[p] +
((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[p].subsampling_y);
}
vpx_memcpy(sa, cm->above_seg_context + mi_col,
sizeof(PARTITION_CONTEXT) * mw);
vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
sizeof(PARTITION_CONTEXT) * mh);
}
static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
int mi_row, int mi_col, int output_enabled,
@ -861,6 +893,337 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
}
}
static void set_partitioning(VP9_COMP *cpi, MODE_INFO *m,
BLOCK_SIZE_TYPE bsize) {
VP9_COMMON *const cm = &cpi->common;
const int mis = cm->mode_info_stride;
int bsl = b_width_log2(bsize);
int bs = (1 << bsl) / 2; //
int block_row, block_col;
int row, col;
// this test function sets the entire macroblock to the same bsize
for (block_row = 0; block_row < 8; block_row += bs) {
for (block_col = 0; block_col < 8; block_col += bs) {
for (row = 0; row < bs; row++) {
for (col = 0; col < bs; col++) {
m[(block_row+row)*mis + block_col+col].mbmi.sb_type = bsize;
}
}
}
}
}
static void set_block_size(VP9_COMMON *const cm,
MODE_INFO *m, BLOCK_SIZE_TYPE bsize, int mis,
int mi_row, int mi_col) {
int row, col;
int bsl = b_width_log2(bsize);
int bs = (1 << bsl) / 2; //
MODE_INFO *m2 = m + mi_row * mis + mi_col;
for (row = 0; row < bs; row++) {
for (col = 0; col < bs; col++) {
if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols)
return;
m2[row*mis+col].mbmi.sb_type = bsize;
}
}
}
typedef struct {
int64_t sum_square_error;
int64_t sum_error;
int count;
int variance;
} var;
#define VT(TYPE, BLOCKSIZE) \
typedef struct { \
var none; \
var horz[2]; \
var vert[2]; \
BLOCKSIZE split[4]; } TYPE;
VT(v8x8, var)
VT(v16x16, v8x8)
VT(v32x32, v16x16)
VT(v64x64, v32x32)
typedef enum {
V16X16,
V32X32,
V64X64,
} TREE_LEVEL;
// Set variance values given sum square error, sum error, count.
static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
v->sum_square_error = s2;
v->sum_error = s;
v->count = c;
v->variance = 256
* (v->sum_square_error - v->sum_error * v->sum_error / v->count)
/ v->count;
}
// Fills a 16x16 variance tree node by calling get var8x8 var..
static void fill_16x16_variance(const unsigned char *s, int sp,
const unsigned char *d, int dp, v16x16 *vt) {
unsigned int sse;
int sum;
vp9_get_sse_sum_8x8(s, sp, d, dp, &sse, &sum);
fill_variance(&vt->split[0].none, sse, sum, 64);
vp9_get_sse_sum_8x8(s + 8, sp, d + 8, dp, &sse, &sum);
fill_variance(&vt->split[1].none, sse, sum, 64);
vp9_get_sse_sum_8x8(s + 8 * sp, sp, d + 8 * dp, dp, &sse, &sum);
fill_variance(&vt->split[2].none, sse, sum, 64);
vp9_get_sse_sum_8x8(s + 8 * sp + 8, sp, d + 8 + 8 * dp, dp, &sse, &sum);
fill_variance(&vt->split[3].none, sse, sum, 64);
}
// Combine 2 variance structures by summing the sum_error, sum_square_error,
// and counts and then calculating the new variance.
void sum_2_variances(var *r, var *a, var*b) {
fill_variance(r, a->sum_square_error + b->sum_square_error,
a->sum_error + b->sum_error, a->count + b->count);
}
// Fill one level of our variance tree, by summing the split sums into each of
// the horizontal, vertical and none from split and recalculating variance.
#define fill_variance_tree(VT) \
sum_2_variances(VT.horz[0], VT.split[0].none, VT.split[1].none); \
sum_2_variances(VT.horz[1], VT.split[2].none, VT.split[3].none); \
sum_2_variances(VT.vert[0], VT.split[0].none, VT.split[2].none); \
sum_2_variances(VT.vert[1], VT.split[1].none, VT.split[3].none); \
sum_2_variances(VT.none, VT.vert[0], VT.vert[1]);
// Set the blocksize in the macroblock info structure if the variance is less
// than our threshold to one of none, horz, vert.
#define set_vt_size(VT, BLOCKSIZE, R, C, ACTION) \
if (VT.none.variance < threshold) { \
set_block_size(cm, m, BLOCKSIZE, mis, R, C); \
ACTION; \
} \
if (VT.horz[0].variance < threshold && VT.horz[1].variance < threshold ) { \
set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_HORZ), mis, R, C); \
ACTION; \
} \
if (VT.vert[0].variance < threshold && VT.vert[1].variance < threshold ) { \
set_block_size(cm, m, get_subsize(BLOCKSIZE, PARTITION_VERT), mis, R, C); \
ACTION; \
}
static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
int mi_col) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCK *x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
const int mis = cm->mode_info_stride;
// TODO(JBB): More experimentation or testing of this threshold;
int64_t threshold = 4;
int i, j, k;
v64x64 vt;
unsigned char * s;
int sp;
const unsigned char * d = xd->plane[0].pre->buf;
int dp = xd->plane[0].pre->stride;
set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64);
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
// TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
// but this needs more experimentation.
threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
// if ( cm->frame_type == KEY_FRAME ) {
d = vp9_64x64_zeros;
dp = 64;
// }
// Fill in the entire tree of 8x8 variances for splits.
for (i = 0; i < 4; i++) {
const int x32_idx = ((i & 1) << 5);
const int y32_idx = ((i >> 1) << 5);
for (j = 0; j < 4; j++) {
const int x_idx = x32_idx + ((j & 1) << 4);
const int y_idx = y32_idx + ((j >> 1) << 4);
fill_16x16_variance(s + y_idx * sp + x_idx, sp, d + y_idx * dp + x_idx,
dp, &vt.split[i].split[j]);
}
}
// Fill the rest of the variance tree by summing the split partition
// values.
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
fill_variance_tree(&vt.split[i].split[j])
}
fill_variance_tree(&vt.split[i])
}
fill_variance_tree(&vt)
// Now go through the entire structure, splitting every blocksize until
// we get to one that's got a variance lower than our threshold, or we
// hit 8x8.
set_vt_size( vt, BLOCK_SIZE_SB64X64, mi_row, mi_col, return);
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
set_vt_size(vt, BLOCK_SIZE_SB32X32, mi_row + y32_idx, mi_col + x32_idx,
continue);
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
set_vt_size(vt, BLOCK_SIZE_MB16X16, mi_row + y32_idx + y16_idx,
mi_col+x32_idx+x16_idx, continue);
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
set_block_size(cm, m, BLOCK_SIZE_SB8X8, mis,
mi_row + y32_idx + y16_idx + y8_idx,
mi_col + x32_idx + x16_idx + x8_idx);
}
}
}
}
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
int *rate, int *dist) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
const int mis = cm->mode_info_stride;
int bwl, bhl;
int bsl = b_width_log2(bsize);
int bs = (1 << bsl);
int bss = (1 << bsl)/4;
int i, pl;
PARTITION_TYPE partition;
BLOCK_SIZE_TYPE subsize;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
int r = 0, d = 0;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
bwl = b_width_log2(m->mbmi.sb_type);
bhl = b_height_log2(m->mbmi.sb_type);
// parse the partition type
if ((bwl == bsl) && (bhl == bsl))
partition = PARTITION_NONE;
else if ((bwl == bsl) && (bhl < bsl))
partition = PARTITION_HORZ;
else if ((bwl < bsl) && (bhl == bsl))
partition = PARTITION_VERT;
else if ((bwl < bsl) && (bhl < bsl))
partition = PARTITION_SPLIT;
else
assert(0);
subsize = get_subsize(bsize, partition);
// TODO(JBB): this restriction is here because pick_sb_modes can return
// r's that are INT_MAX meaning we can't select a mode / mv for this block.
// when the code is made to work for less than sb8x8 we need to come up with
// a solution to this problem.
assert(subsize >= BLOCK_SIZE_SB8X8);
if (bsize >= BLOCK_SIZE_SB8X8) {
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
xd->above_seg_context = cm->above_seg_context + mi_col;
*(get_sb_partitioning(x, bsize)) = subsize;
}
pl = partition_plane_context(xd, bsize);
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
switch (partition) {
case PARTITION_NONE:
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
get_block_context(x, bsize));
r += x->partition_cost[pl][PARTITION_NONE];
break;
case PARTITION_HORZ:
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize));
if (mi_row + (bs >> 1) <= cm->mi_rows) {
int rt, dt;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
pick_sb_modes(cpi, mi_row + (bs >> 2), mi_col, tp, &rt, &dt, subsize,
get_block_context(x, subsize));
r += rt;
d += dt;
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
r += x->partition_cost[pl][PARTITION_HORZ];
break;
case PARTITION_VERT:
*(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize));
if (mi_col + (bs >> 1) <= cm->mi_cols) {
int rt, dt;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
pick_sb_modes(cpi, mi_row, mi_col + (bs >> 2), tp, &rt, &dt, subsize,
get_block_context(x, subsize));
r += rt;
d += dt;
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
r += x->partition_cost[pl][PARTITION_VERT];
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
break;
case PARTITION_SPLIT:
for (i = 0; i < 4; i++) {
int x_idx = (i & 1) * (bs >> 2);
int y_idx = (i >> 1) * (bs >> 2);
int jj = i >> 1, ii = i & 0x01;
int rt, dt;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
*(get_sb_index(xd, subsize)) = i;
rd_use_partition(cpi, m + jj * bss * mis + ii * bss, tp, mi_row + y_idx,
mi_col + x_idx, subsize, &rt, &dt);
r += rt;
d += dt;
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
r += x->partition_cost[pl][PARTITION_SPLIT];
break;
default:
assert(0);
}
// update partition context
#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8 &&
(bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) {
#else
if (bsize > BLOCK_SIZE_SB8X8
&& (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) {
#endif
set_partition_seg_context(cm, xd, mi_row, mi_col);
update_partition_context(xd, subsize, bsize);
}
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
if (r < INT_MAX && d < INT_MAX)
encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
*rate = r;
*dist = d;
}
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previously rate-distortion optimization
@ -877,7 +1240,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
TOKENEXTRA *tp_orig = *tp;
int i, p, pl;
int i, pl;
BLOCK_SIZE_TYPE subsize;
int srate = INT_MAX, sdist = INT_MAX;
@ -889,19 +1252,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
}
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
// buffer the above/left context information of the block in search.
for (p = 0; p < MAX_MB_PLANE; ++p) {
vpx_memcpy(a + bs * p, cm->above_context[p] +
(mi_col * 2 >> xd->plane[p].subsampling_x),
sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_x);
vpx_memcpy(l + bs * p, cm->left_context[p] +
((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
sizeof(ENTROPY_CONTEXT) * bs >> xd->plane[p].subsampling_y);
}
vpx_memcpy(sa, cm->above_seg_context + mi_col,
sizeof(PARTITION_CONTEXT) * ms);
vpx_memcpy(sl, cm->left_seg_context + (mi_row & MI_MASK),
sizeof(PARTITION_CONTEXT) * ms);
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
// PARTITION_SPLIT
if (bsize >= BLOCK_SIZE_SB8X8) {
@ -1029,6 +1380,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
*rate = srate;
*dist = sdist;
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
if (srate < INT_MAX && sdist < INT_MAX)
encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_SIZE_SB64X64, bsize);
@ -1054,8 +1407,22 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row,
for (mi_col = cm->cur_tile_mi_col_start;
mi_col < cm->cur_tile_mi_col_end; mi_col += 8) {
int dummy_rate, dummy_dist;
rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
&dummy_rate, &dummy_dist);
// TODO(JBB): remove the border conditions for 64x64 blocks once its fixed
// without this border check choose will fail on the border of every
// non 64x64.
if (cpi->speed < 5 ||
mi_col + 8 > cm->cur_tile_mi_col_end ||
mi_row + 8 > cm->cur_tile_mi_row_end) {
rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
&dummy_rate, &dummy_dist);
} else {
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO *m = cm->mi + idx_str;
// set_partitioning(cpi, m, BLOCK_SIZE_SB8X8);
choose_partitioning(cpi, cm->mi, mi_row, mi_col);
rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64,
&dummy_rate, &dummy_dist);
}
}
}

View File

@ -696,6 +696,25 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
sf->thresh_mult[THR_COMP_SPLITGA ] += speed_multiplier * 4500;
sf->thresh_mult[THR_COMP_SPLITLG ] += speed_multiplier * 4500;
if (speed > 4) {
for (i = 0; i < MAX_MODES; ++i)
sf->thresh_mult[i] = INT_MAX;
sf->thresh_mult[THR_DC ] = 0;
sf->thresh_mult[THR_TM ] = 0;
sf->thresh_mult[THR_NEWMV ] = 4000;
sf->thresh_mult[THR_NEWG ] = 4000;
sf->thresh_mult[THR_NEWA ] = 4000;
sf->thresh_mult[THR_NEARESTMV] = 0;
sf->thresh_mult[THR_NEARESTG ] = 0;
sf->thresh_mult[THR_NEARESTA ] = 0;
sf->thresh_mult[THR_NEARMV ] = 2000;
sf->thresh_mult[THR_NEARG ] = 2000;
sf->thresh_mult[THR_NEARA ] = 2000;
sf->thresh_mult[THR_COMP_NEARESTLA] = 2000;
sf->recode_loop = 0;
}
/* disable frame modes if flags not set */
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
@ -804,48 +823,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
#endif
#endif
sf->mb16_breakout = 0;
if (speed > 0) {
/* Disable coefficient optimization above speed 0 */
sf->optimize_coefficients = 0;
sf->no_skip_block4x4_search = 0;
sf->comp_inter_joint_search = 0;
sf->first_step = 1;
cpi->mode_check_freq[THR_SPLITG] = 2;
cpi->mode_check_freq[THR_SPLITA] = 2;
cpi->mode_check_freq[THR_SPLITMV] = 0;
cpi->mode_check_freq[THR_COMP_SPLITGA] = 2;
cpi->mode_check_freq[THR_COMP_SPLITLG] = 2;
cpi->mode_check_freq[THR_COMP_SPLITLA] = 0;
}
if (speed > 1) {
cpi->mode_check_freq[THR_SPLITG] = 4;
cpi->mode_check_freq[THR_SPLITA] = 4;
cpi->mode_check_freq[THR_SPLITMV] = 2;
cpi->mode_check_freq[THR_COMP_SPLITGA] = 4;
cpi->mode_check_freq[THR_COMP_SPLITLG] = 4;
cpi->mode_check_freq[THR_COMP_SPLITLA] = 2;
}
if (speed > 2) {
cpi->mode_check_freq[THR_SPLITG] = 15;
cpi->mode_check_freq[THR_SPLITA] = 15;
cpi->mode_check_freq[THR_SPLITMV] = 7;
cpi->mode_check_freq[THR_COMP_SPLITGA] = 15;
cpi->mode_check_freq[THR_COMP_SPLITLG] = 15;
cpi->mode_check_freq[THR_COMP_SPLITLA] = 7;
// Only do recode loop on key frames, golden frames and
// alt ref frames
sf->recode_loop = 2;
}
break;
}; /* switch */

View File

@ -623,9 +623,25 @@ static void super_block_yrd(VP9_COMP *cpi,
int64_t txfm_cache[NB_TXFM_MODES]) {
VP9_COMMON *const cm = &cpi->common;
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_subtract_sby(x, bs);
if (cpi->speed > 4) {
if (bs >= BLOCK_SIZE_SB32X32) {
mbmi->txfm_size = TX_32X32;
} else if (bs >= BLOCK_SIZE_MB16X16) {
mbmi->txfm_size = TX_16X16;
} else if (bs >= BLOCK_SIZE_SB8X8) {
mbmi->txfm_size = TX_8X8;
} else {
mbmi->txfm_size = TX_4X4;
}
super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
mbmi->txfm_size);
return;
}
if (bs >= BLOCK_SIZE_SB32X32)
super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
bs, TX_32X32);
@ -845,7 +861,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t txfm_cache[NB_TXFM_MODES]) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
MACROBLOCKD *xd = &x->e_mbd;
MACROBLOCKD *const xd = &x->e_mbd;
int this_rate, this_rate_tokenonly;
int this_distortion, s;
int64_t best_rd = INT64_MAX, this_rd;
@ -866,7 +882,6 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t local_txfm_cache[NB_TXFM_MODES];
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
if (cpi->common.frame_type == KEY_FRAME) {
const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
const MB_PREDICTION_MODE L = xd->left_available ?
@ -874,12 +889,12 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
bmode_costs = x->y_mode_costs[A][L];
}
x->e_mbd.mode_info_context->mbmi.mode = mode;
vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
bsize, local_txfm_cache);
this_rate = this_rate_tokenonly + bmode_costs[mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
@ -2277,7 +2292,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(mbmi->mv[1].as_mv.col & 15) == 0;
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
if (1) {
if (cpi->speed > 4) {
*best_filter = EIGHTTAP;
} else {
int i, newbest;
int tmp_rate_sum = 0, tmp_dist_sum = 0;
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
@ -2414,6 +2431,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Y cost and distortion
super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
bsize, txfm_cache);
*rate2 += *rate_y;
*distortion += *distortion_y;

View File

@ -318,6 +318,11 @@ unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
return (var - (((unsigned int)avg * avg) >> 7));
}
void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse, int *sum) {
variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
}
unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
int source_stride,