Make the use of pred buffers consistent in MB/SB

Use in-place buffers (dst of MACROBLOCKD) for  macroblock prediction.
This makes the macroblock buffer handling consistent with those of
superblock. Remove predictor buffer MACROBLOCKD.

Change-Id: Id1bcd898961097b1e6230c10f0130753a59fc6df
This commit is contained in:
Jingning Han 2013-04-15 09:31:27 -07:00
parent 38f6232118
commit 6f43ff5824
16 changed files with 212 additions and 335 deletions

View File

@ -290,7 +290,6 @@ typedef struct {
} MODE_INFO; } MODE_INFO;
typedef struct blockd { typedef struct blockd {
uint8_t *predictor;
int16_t *diff; int16_t *diff;
int16_t *dequant; int16_t *dequant;
@ -354,7 +353,6 @@ struct mb_plane {
typedef struct macroblockd { typedef struct macroblockd {
DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */
DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks
#if CONFIG_CODE_NONZEROCOUNT #if CONFIG_CODE_NONZEROCOUNT
DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]); DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]);
#endif #endif

View File

@ -78,7 +78,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) {
const int to = r * 4 + c; const int to = r * 4 + c;
const int from = r * 4 * 16 + c * 4; const int from = r * 4 * 16 + c * 4;
blockd[to].diff = &mb->diff[from]; blockd[to].diff = &mb->diff[from];
blockd[to].predictor = &mb->predictor[from];
} }
} }
@ -87,7 +86,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) {
const int to = 16 + r * 2 + c; const int to = 16 + r * 2 + c;
const int from = 256 + r * 4 * 8 + c * 4; const int from = 256 + r * 4 * 8 + c * 4;
blockd[to].diff = &mb->diff[from]; blockd[to].diff = &mb->diff[from];
blockd[to].predictor = &mb->predictor[from];
} }
} }
@ -96,7 +94,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) {
const int to = 20 + r * 2 + c; const int to = 20 + r * 2 + c;
const int from = 320 + r * 4 * 8 + c * 4; const int from = 320 + r * 4 * 8 + c * 4;
blockd[to].diff = &mb->diff[from]; blockd[to].diff = &mb->diff[from];
blockd[to].predictor = &mb->predictor[from];
} }
} }

View File

@ -32,22 +32,22 @@ static INLINE void recon(int rows, int cols,
void vp9_recon_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, void vp9_recon_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
int stride) { int stride) {
recon(4, 4, pred_ptr, 16, diff_ptr, 16, dst_ptr, stride); recon(4, 4, pred_ptr, stride, diff_ptr, 16, dst_ptr, stride);
} }
void vp9_recon_uv_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, void vp9_recon_uv_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
int stride) { int stride) {
recon(4, 4, pred_ptr, 8, diff_ptr, 8, dst_ptr, stride); recon(4, 4, pred_ptr, stride, diff_ptr, 8, dst_ptr, stride);
} }
void vp9_recon4b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, void vp9_recon4b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
int stride) { int stride) {
recon(4, 16, pred_ptr, 16, diff_ptr, 16, dst_ptr, stride); recon(4, 16, pred_ptr, stride, diff_ptr, 16, dst_ptr, stride);
} }
void vp9_recon2b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, void vp9_recon2b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
int stride) { int stride) {
recon(4, 8, pred_ptr, 8, diff_ptr, 8, dst_ptr, stride); recon(4, 8, pred_ptr, stride, diff_ptr, 8, dst_ptr, stride);
} }
void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst, void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst,
@ -95,7 +95,8 @@ void vp9_recon_mby_c(MACROBLOCKD *xd) {
for (i = 0; i < 16; i += 4) { for (i = 0; i < 16; i += 4) {
BLOCKD *b = &xd->block[i]; BLOCKD *b = &xd->block[i];
vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); vp9_recon4b(*(b->base_dst) + b->dst, b->diff,
*(b->base_dst) + b->dst, b->dst_stride);
} }
} }
@ -104,13 +105,13 @@ void vp9_recon_mb_c(MACROBLOCKD *xd) {
for (i = 0; i < 16; i += 4) { for (i = 0; i < 16; i += 4) {
BLOCKD *b = &xd->block[i]; BLOCKD *b = &xd->block[i];
vp9_recon4b(*(b->base_dst) + b->dst, b->diff,
vp9_recon4b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); *(b->base_dst) + b->dst, b->dst_stride);
} }
for (i = 16; i < 24; i += 2) { for (i = 16; i < 24; i += 2) {
BLOCKD *b = &xd->block[i]; BLOCKD *b = &xd->block[i];
vp9_recon2b(*(b->base_dst) + b->dst, b->diff,
vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); *(b->base_dst) + b->dst, b->dst_stride);
} }
} }

View File

@ -399,7 +399,7 @@ static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
int row, int col) { int row, int col) {
struct scale_factors * scale = &s[which_mv]; struct scale_factors * scale = &s[which_mv];
assert(d1->predictor - d0->predictor == block_size); assert(d1->dst - d0->dst == block_size);
assert(d1->pre == d0->pre + block_size); assert(d1->pre == d0->pre + block_size);
scale->set_scaled_offsets(scale, row, col); scale->set_scaled_offsets(scale, row, col);
@ -446,11 +446,11 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
int block_size, int stride, int block_size, int stride,
int which_mv, int weight, int which_mv, int weight,
const struct subpix_fn_table *subpix, const struct subpix_fn_table *subpix,
int row, int col, int use_dst) { int row, int col) {
uint8_t *d0_predictor = use_dst ? *(d0->base_dst) + d0->dst : d0->predictor; uint8_t *d0_predictor = *(d0->base_dst) + d0->dst;
uint8_t *d1_predictor = use_dst ? *(d1->base_dst) + d1->dst : d1->predictor; uint8_t *d1_predictor = *(d1->base_dst) + d1->dst;
struct scale_factors * scale = &s[which_mv]; struct scale_factors * scale = &s[which_mv];
stride = use_dst ? d0->dst_stride : stride; stride = d0->dst_stride;
assert(d1_predictor - d0_predictor == block_size); assert(d1_predictor - d0_predictor == block_size);
assert(d1->pre == d0->pre + block_size); assert(d1->pre == d0->pre + block_size);
@ -1338,8 +1338,7 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
} }
static void build_inter4x4_predictors_mb(MACROBLOCKD *xd, static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
int mb_row, int mb_col, int mb_row, int mb_col) {
int use_dst) {
int i; int i;
MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
BLOCKD *blockd = xd->block; BLOCKD *blockd = xd->block;
@ -1368,8 +1367,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv, build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv,
which_mv ? weight : 0, which_mv ? weight : 0,
&xd->subpix, mb_row * 16 + y, mb_col * 16, &xd->subpix, mb_row * 16 + y, mb_col * 16);
use_dst);
} }
} }
} else { } else {
@ -1386,8 +1384,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv, build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv,
which_mv ? weight : 0, which_mv ? weight : 0,
&xd->subpix, &xd->subpix,
mb_row * 16 + y, mb_col * 16 + x, mb_row * 16 + y, mb_col * 16 + x);
use_dst);
} }
} }
} }
@ -1405,8 +1402,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
which_mv ? weight : 0, &xd->subpix, which_mv ? weight : 0, &xd->subpix,
mb_row * 8 + y, mb_col * 8 + x, mb_row * 8 + y, mb_col * 8 + x);
use_dst);
} }
} }
} }
@ -1493,58 +1489,17 @@ static void build_4x4uvmvs(MACROBLOCKD *xd) {
} }
} }
void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
uint8_t *dst_y,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_ystride,
int dst_uvstride,
int mb_row,
int mb_col) {
vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col);
vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride,
mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd, dst_y, dst_u, dst_v,
dst_ystride, dst_uvstride);
}
#endif
}
void vp9_build_inter_predictors_mb(MACROBLOCKD *xd, void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
int mb_row, int mb_row,
int mb_col) { int mb_col) {
if (xd->mode_info_context->mbmi.mode != SPLITMV) { if (xd->mode_info_context->mbmi.mode != SPLITMV) {
// TODO(jingning): to be replaced with vp9_build_inter_predictors_sb() when vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
// converting buffers from predictors to dst.
vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
&xd->predictor[256],
&xd->predictor[320], 16, 8,
mb_row, mb_col);
} else { } else {
build_4x4uvmvs(xd); build_4x4uvmvs(xd);
build_inter4x4_predictors_mb(xd, mb_row, mb_col, 0); build_inter4x4_predictors_mb(xd, mb_row, mb_col);
} }
} }
void vp9_build_inter_predictors_mb_s(MACROBLOCKD *xd,
int mb_row,
int mb_col) {
if (xd->mode_info_context->mbmi.mode != SPLITMV) {
vp9_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride,
mb_row, mb_col);
} else {
build_4x4uvmvs(xd);
build_inter4x4_predictors_mb(xd, mb_row, mb_col, 1);
}
}
/*encoder only*/ /*encoder only*/
void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
int mb_row, int mb_col) { int mb_row, int mb_col) {
@ -1593,8 +1548,7 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv, build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
which_mv ? weight : 0, which_mv ? weight : 0,
&xd->subpix, mb_row * 8 + y, mb_col * 8 + x, &xd->subpix, mb_row * 8 + y, mb_col * 8 + x);
0);
} }
} }
} }

View File

@ -29,14 +29,20 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
int mb_row, int mb_row,
int mb_col); int mb_col);
void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd, void vp9_build_inter_predictors_sby(MACROBLOCKD *x,
uint8_t *dst_y, uint8_t *dst_y,
uint8_t *dst_u, int dst_ystride,
uint8_t *dst_v, int mb_row,
int dst_ystride, int mb_col,
int dst_uvstride, BLOCK_SIZE_TYPE bsize);
int mb_row,
int mb_col); void vp9_build_inter_predictors_sbuv(MACROBLOCKD *x,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_uvstride,
int mb_row,
int mb_col,
BLOCK_SIZE_TYPE bsize);
void vp9_build_inter_predictors_sb(MACROBLOCKD *mb, void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
int mb_row, int mb_col, int mb_row, int mb_col,
@ -46,10 +52,6 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
int mb_row, int mb_row,
int mb_col); int mb_col);
void vp9_build_inter_predictors_mb_s(MACROBLOCKD *xd,
int mb_row,
int mb_col);
void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd, void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
int mb_row, int mb_row,
int mb_col); int mb_col);

View File

@ -273,7 +273,8 @@ void vp9_recon_intra_mbuv(MACROBLOCKD *xd) {
int i; int i;
for (i = 16; i < 24; i += 2) { for (i = 16; i < 24; i += 2) {
BLOCKD *b = &xd->block[i]; BLOCKD *b = &xd->block[i];
vp9_recon2b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); vp9_recon2b(*(b->base_dst) + b->dst, b->diff,
*(b->base_dst) + b->dst, b->dst_stride);
} }
} }
@ -758,40 +759,6 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd,
xd->left_available, xd->right_available); xd->left_available, xd->right_available);
} }
// TODO(jingning): merge mby and mbuv into the above sby and sbmu functions
void vp9_build_intra_predictors_mby(MACROBLOCKD *xd) {
vp9_build_intra_predictors(xd->dst.y_buffer, xd->dst.y_stride,
xd->predictor, 16,
xd->mode_info_context->mbmi.mode,
16, 16,
xd->up_available, xd->left_available,
xd->right_available);
}
void vp9_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd,
uint8_t *upred_ptr,
uint8_t *vpred_ptr,
int uv_stride,
int mode, int bsize) {
vp9_build_intra_predictors(xd->dst.u_buffer, xd->dst.uv_stride,
upred_ptr, uv_stride, mode,
bsize, bsize,
xd->up_available, xd->left_available,
xd->right_available);
vp9_build_intra_predictors(xd->dst.v_buffer, xd->dst.uv_stride,
vpred_ptr, uv_stride, mode,
bsize, bsize,
xd->up_available, xd->left_available,
xd->right_available);
}
void vp9_build_intra_predictors_mbuv(MACROBLOCKD *xd) {
vp9_build_intra_predictors_mbuv_internal(xd, &xd->predictor[256],
&xd->predictor[320], 8,
xd->mode_info_context->mbmi.uv_mode,
8);
}
void vp9_intra8x8_predict(MACROBLOCKD *xd, void vp9_intra8x8_predict(MACROBLOCKD *xd,
BLOCKD *b, BLOCKD *b,
int mode, int mode,

View File

@ -68,11 +68,15 @@ specialize vp9_recon_b
prototype void vp9_recon_uv_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" prototype void vp9_recon_uv_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
specialize vp9_recon_uv_b specialize vp9_recon_uv_b
# TODO(jingning): The prototype functions in c are modified to enable block-size configurable
# operations. Need to change the sse2 accrodingly.
prototype void vp9_recon2b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" prototype void vp9_recon2b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
specialize vp9_recon2b sse2 specialize vp9_recon2b
# specialize vp9_recon2b sse2
prototype void vp9_recon4b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" prototype void vp9_recon4b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
specialize vp9_recon4b sse2 specialize vp9_recon4b
# specialize vp9_recon4b sse2
prototype void vp9_recon_mb "struct macroblockd *x" prototype void vp9_recon_mb "struct macroblockd *x"
specialize vp9_recon_mb specialize vp9_recon_mb
@ -86,17 +90,14 @@ specialize vp9_recon_sby_s
prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst, enum BLOCK_SIZE_TYPE bsize" prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst, enum BLOCK_SIZE_TYPE bsize"
specialize void vp9_recon_sbuv_s specialize void vp9_recon_sbuv_s
prototype void vp9_build_intra_predictors "uint8_t *src, int src_stride, uint8_t *pred, int y_stride, int mode, int bw, int bh, int up_available, int left_available, int right_available"
specialize void vp9_build_intra_predictors
prototype void vp9_build_intra_predictors_sby_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize" prototype void vp9_build_intra_predictors_sby_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize"
specialize vp9_build_intra_predictors_sby_s; specialize vp9_build_intra_predictors_sby_s
prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize" prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize"
specialize vp9_build_intra_predictors_sbuv_s; specialize vp9_build_intra_predictors_sbuv_s
prototype void vp9_build_intra_predictors_mby "struct macroblockd *x"
specialize vp9_build_intra_predictors_mby;
prototype void vp9_build_intra_predictors_mbuv "struct macroblockd *x"
specialize vp9_build_intra_predictors_mbuv;
prototype void vp9_intra4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor, int pre_stride" prototype void vp9_intra4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor, int pre_stride"
specialize vp9_intra4x4_predict; specialize vp9_intra4x4_predict;
@ -620,16 +621,10 @@ specialize vp9_block_error mmx sse2
vp9_block_error_sse2=vp9_block_error_xmm vp9_block_error_sse2=vp9_block_error_xmm
prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
specialize vp9_subtract_b mmx sse2 # TODO(jingning): The prototype function in c has been changed to remove
# the use of predictor buffer in MACROBLOCKD. Need to modify the mmx and sse2
prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" # versions accordingly.
specialize vp9_subtract_b mmx sse2 specialize vp9_subtract_b
prototype void vp9_subtract_mby "int16_t *diff, uint8_t *src, uint8_t *pred, int stride"
specialize vp9_subtract_mby mmx sse2
prototype void vp9_subtract_mbuv "int16_t *diff, uint8_t *usrc, uint8_t *vsrc, uint8_t *pred, int stride"
specialize vp9_subtract_mbuv mmx sse2
# #
# Structured Similarity (SSIM) # Structured Similarity (SSIM)

View File

@ -73,15 +73,15 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd,
} }
void vp9_build_intra_predictors_mbuv_sse2(MACROBLOCKD *xd) { void vp9_build_intra_predictors_mbuv_sse2(MACROBLOCKD *xd) {
build_intra_predictors_mbuv_x86(xd, &xd->predictor[256], build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
&xd->predictor[320], 8, xd->dst.v_buffer, xd->dst.uv_stride,
vp9_intra_pred_uv_tm_sse2, vp9_intra_pred_uv_tm_sse2,
vp9_intra_pred_uv_ho_mmx2); vp9_intra_pred_uv_ho_mmx2);
} }
void vp9_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *xd) { void vp9_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *xd) {
build_intra_predictors_mbuv_x86(xd, &xd->predictor[256], build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
&xd->predictor[320], 8, xd->dst.v_buffer, xd->dst.uv_stride,
vp9_intra_pred_uv_tm_ssse3, vp9_intra_pred_uv_tm_ssse3,
vp9_intra_pred_uv_ho_ssse3); vp9_intra_pred_uv_ho_ssse3);
} }

View File

@ -645,7 +645,7 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->mode_info_context->mbmi.mode, tx_size, xd->mode_info_context->mbmi.mode, tx_size,
xd->mode_info_context->mbmi.interp_filter); xd->mode_info_context->mbmi.interp_filter);
#endif #endif
vp9_build_inter_predictors_mb_s(xd, mb_row, mb_col); vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
} }
if (xd->mode_info_context->mbmi.mb_skip_coeff) { if (xd->mode_info_context->mbmi.mb_skip_coeff) {

View File

@ -1405,9 +1405,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
int totalrate; int totalrate;
// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", // fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
// cpi->common.current_video_frame, cpi->common.show_frame, // cpi->common.current_video_frame, cpi->common.show_frame,
// cm->frame_type); // cm->frame_type);
// Compute a modified set of reference frame probabilities to use when // Compute a modified set of reference frame probabilities to use when
// prediction fails. These are based on the current general estimates for // prediction fails. These are based on the current general estimates for
@ -2230,15 +2230,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
if (!x->skip) { if (!x->skip) {
vp9_encode_inter16x16(cm, x, mb_row, mb_col); vp9_encode_inter16x16(cm, x, mb_row, mb_col);
} else { } else {
vp9_build_inter16x16_predictors_mb(xd, vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
xd->dst.uv_stride,
mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED #if CONFIG_COMP_INTERINTRA_PRED
if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd, vp9_build_interintra_16x16_predictors_mb(xd,

View File

@ -52,7 +52,8 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib) {
b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b); b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b);
#endif #endif
vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor, 16); vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first,
*(b->base_dst) + b->dst, b->dst_stride);
vp9_subtract_b(be, b, 16); vp9_subtract_b(be, b, 16);
tx_type = get_tx_type_4x4(&x->e_mbd, ib); tx_type = get_tx_type_4x4(&x->e_mbd, ib);
@ -69,7 +70,8 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib) {
b->diff, 32); b->diff, 32);
} }
vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); vp9_recon_b(*(b->base_dst) + b->dst, b->diff,
*(b->base_dst) + b->dst, b->dst_stride);
} }
void vp9_encode_intra4x4mby(MACROBLOCK *mb) { void vp9_encode_intra4x4mby(MACROBLOCK *mb) {
@ -81,12 +83,13 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) {
void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
vp9_build_intra_predictors_mby(xd); vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16);
vp9_subtract_sby_s_c(x->src_diff,
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); x->src.y_buffer, x->src.y_stride,
xd->dst.y_buffer, xd->dst.y_stride,
BLOCK_SIZE_MB16X16);
switch (tx_size) { switch (tx_size) {
case TX_16X16: case TX_16X16:
@ -119,10 +122,11 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
vp9_build_intra_predictors_mbuv(xd); vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
vp9_subtract_sbuv_s_c(x->src_diff,
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
xd->predictor, x->src.uv_stride); xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride,
BLOCK_SIZE_MB16X16);
switch (tx_size) { switch (tx_size) {
case TX_4X4: case TX_4X4:
@ -152,7 +156,8 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
int i; int i;
TX_TYPE tx_type; TX_TYPE tx_type;
vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, b->predictor, 16); vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first,
*(b->base_dst) + b->dst, b->dst_stride);
// generate residual blocks // generate residual blocks
vp9_subtract_4b_c(be, b, 16); vp9_subtract_4b_c(be, b, 16);
@ -206,7 +211,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
// reconstruct submacroblock // reconstruct submacroblock
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
b = &xd->block[ib + iblock[i]]; b = &xd->block[ib + iblock[i]];
vp9_recon_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, vp9_recon_b_c(*(b->base_dst) + b->dst, b->diff, *(b->base_dst) + b->dst,
b->dst_stride); b->dst_stride);
} }
} }
@ -227,7 +232,8 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
const int block = ib < 20 ? ib - 16 : ib - 20; const int block = ib < 20 ? ib - 16 : ib - 20;
assert(ib >= 16 && ib < 24); assert(ib >= 16 && ib < 24);
vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor, 8); vp9_intra_uv4x4_predict(&x->e_mbd, b, mode,
*(b->base_dst) + b->dst, b->dst_stride);
vp9_subtract_b(be, b, 8); vp9_subtract_b(be, b, 8);
@ -236,7 +242,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block], vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block],
dqcoeff, b->diff, 16); dqcoeff, b->diff, 16);
vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, vp9_recon_uv_b_c(*(b->base_dst) + b->dst, b->diff, *(b->base_dst) + b->dst,
b->dst_stride); b->dst_stride);
} }

View File

@ -23,8 +23,9 @@
void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
uint8_t *src_ptr = (*(be->base_src) + be->src); uint8_t *src_ptr = (*(be->base_src) + be->src);
int16_t *diff_ptr = be->src_diff; int16_t *diff_ptr = be->src_diff;
uint8_t *pred_ptr = bd->predictor; uint8_t *pred_ptr = *(bd->base_dst) + bd->dst;
int src_stride = be->src_stride; int src_stride = be->src_stride;
int dst_stride = bd->dst_stride;
int r, c; int r, c;
@ -33,7 +34,7 @@ void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
diff_ptr[c] = src_ptr[c] - pred_ptr[c]; diff_ptr[c] = src_ptr[c] - pred_ptr[c];
diff_ptr += pitch; diff_ptr += pitch;
pred_ptr += pitch; pred_ptr += dst_stride;
src_ptr += src_stride; src_ptr += src_stride;
} }
} }
@ -41,8 +42,9 @@ void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
uint8_t *src_ptr = (*(be->base_src) + be->src); uint8_t *src_ptr = (*(be->base_src) + be->src);
int16_t *diff_ptr = be->src_diff; int16_t *diff_ptr = be->src_diff;
uint8_t *pred_ptr = bd->predictor; uint8_t *pred_ptr = *(bd->base_dst) + bd->dst;
int src_stride = be->src_stride; int src_stride = be->src_stride;
int dst_stride = bd->dst_stride;
int r, c; int r, c;
for (r = 0; r < 8; r++) { for (r = 0; r < 8; r++) {
@ -50,7 +52,7 @@ void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
diff_ptr[c] = src_ptr[c] - pred_ptr[c]; diff_ptr[c] = src_ptr[c] - pred_ptr[c];
diff_ptr += pitch; diff_ptr += pitch;
pred_ptr += pitch; pred_ptr += dst_stride;
src_ptr += src_stride; src_ptr += src_stride;
} }
} }
@ -102,25 +104,15 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc,
} }
} }
void vp9_subtract_mby_c(int16_t *diff, uint8_t *src,
uint8_t *pred, int stride) {
vp9_subtract_sby_s_c(diff, src, stride, pred, 16, BLOCK_SIZE_MB16X16);
}
void vp9_subtract_mbuv_c(int16_t *diff, uint8_t *usrc,
uint8_t *vsrc, uint8_t *pred, int stride) {
uint8_t *upred = pred + 256;
uint8_t *vpred = pred + 320;
vp9_subtract_sbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8,
BLOCK_SIZE_MB16X16);
}
static void subtract_mb(MACROBLOCK *x) { static void subtract_mb(MACROBLOCK *x) {
vp9_subtract_mby(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, MACROBLOCKD *xd = &x->e_mbd;
x->src.y_stride); vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride,
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, xd->dst.y_buffer, xd->dst.y_stride,
x->e_mbd.predictor, x->src.uv_stride); BLOCK_SIZE_MB16X16);
vp9_subtract_sbuv_s_c(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->src.uv_stride,
xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride,
BLOCK_SIZE_MB16X16);
} }
void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
@ -920,11 +912,12 @@ void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
/* this function is used by first pass only */ /* this function is used by first pass only */
void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) { void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) {
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride,
mb_row, mb_col, BLOCK_SIZE_MB16X16);
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride,
xd->dst.y_buffer, xd->dst.y_stride,
BLOCK_SIZE_MB16X16);
vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16);

View File

@ -58,13 +58,6 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch);
void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc,
const uint8_t *vsrc, int src_stride,
const uint8_t *upred,
const uint8_t *vpred, int dst_stride);
void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src,
int src_stride, const uint8_t *pred,
int dst_stride);
void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
const uint8_t *pred, int dst_stride, const uint8_t *pred, int dst_stride,
BLOCK_SIZE_TYPE bsize); BLOCK_SIZE_TYPE bsize);

View File

@ -71,9 +71,10 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
} }
vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col); vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride,
best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, mb_row, mb_col, BLOCK_SIZE_MB16X16);
xd->predictor, 16, INT_MAX); best_err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
/* restore UMV window */ /* restore UMV window */
x->mv_col_min = tmp_col_min; x->mv_col_min = tmp_col_min;
@ -105,21 +106,19 @@ static int do_16x16_motion_search
BLOCKD *d = &xd->block[n]; BLOCKD *d = &xd->block[n];
BLOCK *b = &x->block[n]; BLOCK *b = &x->block[n];
b->base_src = &buf->y_buffer; b->base_src = &x->src.y_buffer;
b->src_stride = buf->y_stride; b->src_stride = x->src.y_stride;
b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset; b->src = x->src.y_stride * (n & 12) + (n & 3) * 4;
d->base_pre = &ref->y_buffer; d->base_pre = &xd->pre.y_buffer;
d->pre_stride = ref->y_stride; d->pre_stride = xd->pre.y_stride;
d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset; d->pre = xd->pre.y_stride * (n & 12) + (n & 3) * 4;
} }
// Try zero MV first // Try zero MV first
// FIXME should really use something like near/nearest MV and/or MV prediction // FIXME should really use something like near/nearest MV and/or MV prediction
xd->pre.y_buffer = ref->y_buffer + mb_y_offset; err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
xd->pre.y_stride = ref->y_stride; xd->pre.y_buffer, xd->pre.y_stride, INT_MAX);
err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
dst_mv->as_int = 0; dst_mv->as_int = 0;
// Test last reference frame using the previous best mv as the // Test last reference frame using the previous best mv as the
@ -159,27 +158,11 @@ static int do_16x16_zerozero_search
MACROBLOCK *const x = &cpi->mb; MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
unsigned int err; unsigned int err;
int n;
for (n = 0; n < 16; n++) {
BLOCKD *d = &xd->block[n];
BLOCK *b = &x->block[n];
b->base_src = &buf->y_buffer;
b->src_stride = buf->y_stride;
b->src = buf->y_stride * (n & 12) + (n & 3) * 4 + buf_mb_y_offset;
d->base_pre = &ref->y_buffer;
d->pre_stride = ref->y_stride;
d->pre = ref->y_stride * (n & 12) + (n & 3) * 4 + mb_y_offset;
}
// Try zero MV first // Try zero MV first
// FIXME should really use something like near/nearest MV and/or MV prediction // FIXME should really use something like near/nearest MV and/or MV prediction
xd->pre.y_buffer = ref->y_buffer + mb_y_offset; err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
xd->pre.y_stride = ref->y_stride; xd->pre.y_buffer, xd->pre.y_stride, INT_MAX);
err = vp9_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride,
xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
dst_mv->as_int = 0; dst_mv->as_int = 0;
@ -201,11 +184,19 @@ static int find_best_16x16_intra
// we're intentionally not doing 4x4, we just want a rough estimate // we're intentionally not doing 4x4, we just want a rough estimate
for (mode = DC_PRED; mode <= TM_PRED; mode++) { for (mode = DC_PRED; mode <= TM_PRED; mode++) {
unsigned int err; unsigned int err;
const int bwl = b_width_log2(BLOCK_SIZE_MB16X16), bw = 4 << bwl;
const int bhl = b_height_log2(BLOCK_SIZE_MB16X16), bh = 4 << bhl;
xd->mode_info_context->mbmi.mode = mode; xd->mode_info_context->mbmi.mode = mode;
vp9_build_intra_predictors_mby(xd); vp9_build_intra_predictors(x->src.y_buffer, x->src.y_stride,
err = vp9_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset, xd->dst.y_buffer, xd->dst.y_stride,
buf->y_stride, best_err); xd->mode_info_context->mbmi.mode,
bw, bh,
xd->up_available, xd->left_available,
xd->right_available);
err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
xd->dst.y_buffer, xd->dst.y_stride, best_err);
// find best // find best
if (err < best_err) { if (err < best_err) {
best_err = err; best_err = err;
@ -237,23 +228,32 @@ static void update_mbgraph_mb_stats
MACROBLOCK *const x = &cpi->mb; MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
int intra_error; int intra_error;
VP9_COMMON *cm = &cpi->common;
// FIXME in practice we're completely ignoring chroma here // FIXME in practice we're completely ignoring chroma here
xd->dst.y_buffer = buf->y_buffer + mb_y_offset; x->src.y_buffer = buf->y_buffer + mb_y_offset;
x->src.y_stride = buf->y_stride;
xd->dst.y_buffer = cm->yv12_fb[cm->new_fb_idx].y_buffer + mb_y_offset;
xd->dst.y_stride = cm->yv12_fb[cm->new_fb_idx].y_stride;
// do intra 16x16 prediction // do intra 16x16 prediction
intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset, &stats->ref[INTRA_FRAME].m.mode); intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset,
&stats->ref[INTRA_FRAME].m.mode);
if (intra_error <= 0) if (intra_error <= 0)
intra_error = 1; intra_error = 1;
stats->ref[INTRA_FRAME].err = intra_error; stats->ref[INTRA_FRAME].err = intra_error;
// Golden frame MV search, if it exists and is different than last frame // Golden frame MV search, if it exists and is different than last frame
if (golden_ref) { if (golden_ref) {
int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, int g_motion_error;
&stats->ref[GOLDEN_FRAME].m.mv, xd->pre.y_buffer = golden_ref->y_buffer + mb_y_offset;
buf, mb_y_offset, xd->pre.y_stride = golden_ref->y_stride;
golden_ref, gld_y_offset, g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv,
mb_row, mb_col); &stats->ref[GOLDEN_FRAME].m.mv,
buf, mb_y_offset,
golden_ref, gld_y_offset,
mb_row, mb_col);
stats->ref[GOLDEN_FRAME].err = g_motion_error; stats->ref[GOLDEN_FRAME].err = g_motion_error;
} else { } else {
stats->ref[GOLDEN_FRAME].err = INT_MAX; stats->ref[GOLDEN_FRAME].err = INT_MAX;
@ -262,16 +262,13 @@ static void update_mbgraph_mb_stats
// Alt-ref frame MV search, if it exists and is different than last/golden frame // Alt-ref frame MV search, if it exists and is different than last/golden frame
if (alt_ref) { if (alt_ref) {
// int a_motion_error = do_16x16_motion_search(cpi, prev_alt_ref_mv, int a_motion_error;
// &stats->ref[ALTREF_FRAME].m.mv, xd->pre.y_buffer = alt_ref->y_buffer + mb_y_offset;
// buf, mb_y_offset, xd->pre.y_stride = alt_ref->y_stride;
// alt_ref, arf_y_offset); a_motion_error = do_16x16_zerozero_search(cpi,
&stats->ref[ALTREF_FRAME].m.mv,
int a_motion_error = buf, mb_y_offset,
do_16x16_zerozero_search(cpi, alt_ref, arf_y_offset);
&stats->ref[ALTREF_FRAME].m.mv,
buf, mb_y_offset,
alt_ref, arf_y_offset);
stats->ref[ALTREF_FRAME].err = a_motion_error; stats->ref[ALTREF_FRAME].err = a_motion_error;
} else { } else {

View File

@ -638,15 +638,6 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
rd[TX_4X4][1] : rd[TX_8X8][1]; rd[TX_4X4][1] : rd[TX_8X8][1];
} }
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
const unsigned int *p = (const unsigned int *)predictor;
unsigned int *d = (unsigned int *)dst;
d[0] = p[0];
d[4] = p[4];
d[8] = p[8];
d[12] = p[12];
}
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
int block_size, int shift) { int block_size, int shift) {
int i; int i;
@ -849,13 +840,7 @@ static void super_block_yrd(VP9_COMP *cpi,
uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
// FIXME(rbultje): mb code still predicts into xd->predictor vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, bs);
if (bs == BLOCK_SIZE_MB16X16) {
vp9_subtract_mby(x->src_diff, src, xd->predictor, src_y_stride);
} else {
vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride,
bs);
}
if (bs >= BLOCK_SIZE_SB32X32) if (bs >= BLOCK_SIZE_SB32X32)
super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
@ -892,7 +877,6 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
* a temp buffer that meets the stride requirements, but we are only * a temp buffer that meets the stride requirements, but we are only
* interested in the left 4x4 block * interested in the left 4x4 block
* */ * */
DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4);
DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
assert(ib < 16); assert(ib < 16);
@ -922,7 +906,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
rate = bmode_costs[mode]; rate = bmode_costs[mode];
#endif #endif
vp9_intra4x4_predict(xd, b, mode, b->predictor, 16); vp9_intra4x4_predict(xd, b, mode, *(b->base_dst) + b->dst, b->dst_stride);
vp9_subtract_b(be, b, 16); vp9_subtract_b(be, b, 16);
b->bmi.as_mode.first = mode; b->bmi.as_mode.first = mode;
@ -956,7 +940,6 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
best_tx_type = tx_type; best_tx_type = tx_type;
*a = tempa; *a = tempa;
*l = templ; *l = templ;
copy_predictor(best_predictor, b->predictor);
vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32); vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32);
} }
} }
@ -968,7 +951,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
else else
xd->inv_txm4x4(best_dqcoeff, b->diff, 32); xd->inv_txm4x4(best_dqcoeff, b->diff, 32);
vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); vp9_intra4x4_predict(xd, b, *best_mode,
*(b->base_dst) + b->dst, b->dst_stride);
vp9_recon_b(*(b->base_dst) + b->dst, b->diff,
*(b->base_dst) + b->dst, b->dst_stride);
return best_rd; return best_rd;
} }
@ -1063,11 +1049,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t local_txfm_cache[NB_TXFM_MODES]; int64_t local_txfm_cache[NB_TXFM_MODES];
x->e_mbd.mode_info_context->mbmi.mode = mode; x->e_mbd.mode_info_context->mbmi.mode = mode;
if (bsize == BLOCK_SIZE_MB16X16) { vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
vp9_build_intra_predictors_mby(&x->e_mbd);
} else {
vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
}
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
bsize, local_txfm_cache); bsize, local_txfm_cache);
@ -1129,7 +1111,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
rate = mode_costs[mode]; rate = mode_costs[mode];
b->bmi.as_mode.first = mode; b->bmi.as_mode.first = mode;
vp9_intra8x8_predict(xd, b, mode, b->predictor, 16); vp9_intra8x8_predict(xd, b, mode, *(b->base_dst) + b->dst, b->dst_stride);
vp9_subtract_4b_c(be, b, 16); vp9_subtract_4b_c(be, b, 16);
@ -1543,14 +1525,8 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
// FIXME(rbultje): mb code still predicts into xd->predictor vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
if (bsize == BLOCK_SIZE_MB16X16) { udst, vdst, dst_uv_stride, bsize);
vp9_subtract_mbuv(x->src_diff, usrc, vsrc, xd->predictor,
x->src.uv_stride);
} else {
vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride, bsize);
}
if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) { if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) {
super_block_uvrd_32x32(cm, x, rate, distortion, skippable, bsize); super_block_uvrd_32x32(cm, x, rate, distortion, skippable, bsize);
@ -1576,10 +1552,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (mode = DC_PRED; mode <= TM_PRED; mode++) { for (mode = DC_PRED; mode <= TM_PRED; mode++) {
x->e_mbd.mode_info_context->mbmi.uv_mode = mode; x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
if (bsize == BLOCK_SIZE_MB16X16) vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize);
vp9_build_intra_predictors_mbuv(&x->e_mbd);
else
vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize);
super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
&this_distortion, &s, bsize); &this_distortion, &s, bsize);
@ -1759,7 +1732,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
vp9_build_inter_predictor(*(bd->base_pre) + bd->pre, vp9_build_inter_predictor(*(bd->base_pre) + bd->pre,
bd->pre_stride, bd->pre_stride,
bd->predictor, 16, *(bd->base_dst) + bd->dst,
bd->dst_stride,
&bd->bmi.as_mv[0], &bd->bmi.as_mv[0],
&xd->scale_factor[0], &xd->scale_factor[0],
4, 4, 0 /* no avg */, &xd->subpix); 4, 4, 0 /* no avg */, &xd->subpix);
@ -1769,7 +1743,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
// weighting for splitmv modes is turned on. // weighting for splitmv modes is turned on.
if (xd->mode_info_context->mbmi.second_ref_frame > 0) { if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
vp9_build_inter_predictor( vp9_build_inter_predictor(
*(bd->base_second_pre) + bd->pre, bd->pre_stride, bd->predictor, 16, *(bd->base_second_pre) + bd->pre, bd->pre_stride,
*(bd->base_dst) + bd->dst, bd->dst_stride,
&bd->bmi.as_mv[1], &xd->scale_factor[1], 4, 4, &bd->bmi.as_mv[1], &xd->scale_factor[1], 4, 4,
1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT) /* avg */, 1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT) /* avg */,
&xd->subpix); &xd->subpix);
@ -1834,7 +1809,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
// implicit-compoundinter-weight experiment when implicit // implicit-compoundinter-weight experiment when implicit
// weighting for splitmv modes is turned on. // weighting for splitmv modes is turned on.
vp9_build_inter_predictor( vp9_build_inter_predictor(
*base_pre + bd->pre, bd->pre_stride, bd->predictor, 16, *base_pre + bd->pre, bd->pre_stride,
*(bd->base_dst) + bd->dst, bd->dst_stride,
&bd->bmi.as_mv[which_mv], &xd->scale_factor[which_mv], 8, 8, &bd->bmi.as_mv[which_mv], &xd->scale_factor[which_mv], 8, 8,
which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
&xd->subpix); &xd->subpix);
@ -3144,23 +3120,20 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int sse, var; unsigned int sse, var;
int tmp_rate_y, tmp_rate_u, tmp_rate_v; int tmp_rate_y, tmp_rate_u, tmp_rate_v;
int tmp_dist_y, tmp_dist_u, tmp_dist_v; int tmp_dist_y, tmp_dist_u, tmp_dist_v;
vp9_build_inter16x16_predictors_mb(xd, xd->predictor, vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
xd->predictor + 256,
xd->predictor + 320,
16, 8, mb_row, mb_col);
var = vp9_variance16x16(*(b->base_src), b->src_stride, var = vp9_variance16x16(*(b->base_src), b->src_stride,
xd->predictor, 16, &sse); xd->dst.y_buffer, xd->dst.y_stride, &sse);
// Note our transform coeffs are 8 times an orthogonal transform. // Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer // Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function. // we need to divide by 8 before sending to modeling function.
model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3, model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y); &tmp_rate_y, &tmp_dist_y);
var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride, var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
&xd->predictor[256], 8, &sse); xd->dst.u_buffer, xd->dst.uv_stride, &sse);
model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3, model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u); &tmp_rate_u, &tmp_dist_u);
var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride, var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
&xd->predictor[320], 8, &sse); xd->dst.v_buffer, xd->dst.uv_stride, &sse);
model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3, model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v); &tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv, rd = RDCOST(x->rdmult, x->rddiv,
@ -3184,9 +3157,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
(cm->mcomp_filter_type != SWITCHABLE && (cm->mcomp_filter_type != SWITCHABLE &&
cm->mcomp_filter_type == mbmi->interp_filter)) { cm->mcomp_filter_type == mbmi->interp_filter)) {
vpx_memcpy(tmp_ybuf, xd->predictor, sizeof(unsigned char) * 256); int i;
vpx_memcpy(tmp_ubuf, xd->predictor + 256, sizeof(unsigned char) * 64); for (i = 0; i < 16 * bh; ++i)
vpx_memcpy(tmp_vbuf, xd->predictor + 320, sizeof(unsigned char) * 64); vpx_memcpy(tmp_ybuf + i * 16 * bw,
xd->dst.y_buffer + i * xd->dst.y_stride,
sizeof(unsigned char) * 16 * bw);
for (i = 0; i < 8 * bh; ++i)
vpx_memcpy(tmp_ubuf + i * 8 * bw,
xd->dst.u_buffer + i * xd->dst.uv_stride,
sizeof(unsigned char) * 8 * bw);
for (i = 0; i < 8 * bh; ++i)
vpx_memcpy(tmp_vbuf + i * 8 * bw,
xd->dst.v_buffer + i * xd->dst.uv_stride,
sizeof(unsigned char) * 8 * bw);
pred_exists = 1; pred_exists = 1;
} }
interpolating_intpel_seen |= interpolating_intpel_seen |=
@ -3203,32 +3186,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (pred_exists) { if (pred_exists) {
// FIXME(rbultje): mb code still predicts into xd->predictor // FIXME(rbultje): mb code still predicts into xd->predictor
if (bsize != BLOCK_SIZE_MB16X16) { for (i = 0; i < bh * 16; ++i)
for (i = 0; i < bh * 16; ++i) vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride,
vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16);
tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16); for (i = 0; i < bh * 8; ++i)
for (i = 0; i < bh * 8; ++i) vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride,
vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8); for (i = 0; i < bh * 8; ++i)
for (i = 0; i < bh * 8; ++i) vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride,
vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
} else {
vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256);
vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64);
vpx_memcpy(xd->predictor + 320, tmp_vbuf, sizeof(unsigned char) * 64);
}
} else { } else {
// Handles the special case when a filter that is not in the // Handles the special case when a filter that is not in the
// switchable list (ex. bilinear, 6-tap) is indicated at the frame level // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
if (bsize > BLOCK_SIZE_MB16X16) { vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize);
vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize);
} else {
vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
xd->predictor + 256,
xd->predictor + 320,
16, 8, mb_row, mb_col);
}
} }
if (cpi->common.mcomp_filter_type == SWITCHABLE) { if (cpi->common.mcomp_filter_type == SWITCHABLE) {
@ -3253,7 +3223,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
&sse); &sse);
} else { } else {
var = vp9_variance16x16(*(b->base_src), b->src_stride, var = vp9_variance16x16(*(b->base_src), b->src_stride,
xd->predictor, 16, &sse); xd->dst.y_buffer, xd->dst.y_stride, &sse);
} }
if ((int)sse < threshold) { if ((int)sse < threshold) {
@ -3278,9 +3248,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
} else { } else {
unsigned int sse2u, sse2v; unsigned int sse2u, sse2v;
var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride, var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
xd->predictor + 256, 8, &sse2u); xd->dst.u_buffer, xd->dst.uv_stride, &sse2u);
var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride, var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
xd->predictor + 320, 8, &sse2v); xd->dst.v_buffer, xd->dst.uv_stride, &sse2v);
sse2 = sse2u + sse2v; sse2 = sse2u + sse2v;
} }
@ -3614,7 +3584,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
case TM_PRED: case TM_PRED:
mbmi->ref_frame = INTRA_FRAME; mbmi->ref_frame = INTRA_FRAME;
// FIXME compound intra prediction // FIXME compound intra prediction
vp9_build_intra_predictors_mby(&x->e_mbd); vp9_build_intra_predictors_sby_s(&x->e_mbd, BLOCK_SIZE_MB16X16);
// vp9_build_intra_predictors_mby(&x->e_mbd);
super_block_yrd(cpi, x, &rate_y, &distortion, &skippable, super_block_yrd(cpi, x, &rate_y, &distortion, &skippable,
BLOCK_SIZE_MB16X16, txfm_cache); BLOCK_SIZE_MB16X16, txfm_cache);
rate2 += rate_y; rate2 += rate_y;
@ -3790,8 +3761,14 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int uv_skippable; int uv_skippable;
vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col); vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col);
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride); vp9_subtract_sbuv_s_c(x->src_diff,
x->src.u_buffer,
x->src.v_buffer, x->src.uv_stride,
xd->dst.u_buffer,
xd->dst.v_buffer, xd->dst.uv_stride,
BLOCK_SIZE_MB16X16);
super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv, super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv,
&uv_skippable, BLOCK_SIZE_MB16X16); &uv_skippable, BLOCK_SIZE_MB16X16);
rate2 += rate_uv; rate2 += rate_uv;

View File

@ -30,7 +30,9 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
unsigned char *z = *(be->base_src) + be->src; unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride; unsigned int src_stride = be->src_stride;
short *diff = &be->src_diff[0]; short *diff = &be->src_diff[0];
unsigned char *predictor = &bd->predictor[0]; unsigned char *predictor = *(bd->base_dst) + bd->dst;
// TODO(jingning): The prototype function in c has been changed. Need to
// modify the mmx and sse versions.
vp9_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch); vp9_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch);
} }
@ -44,7 +46,9 @@ void vp9_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) {
unsigned char *z = *(be->base_src) + be->src; unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride; unsigned int src_stride = be->src_stride;
short *diff = &be->src_diff[0]; short *diff = &be->src_diff[0];
unsigned char *predictor = &bd->predictor[0]; unsigned char *predictor = *(bd->base_dst) + bd->dst;
// TODO(jingning): The prototype function in c has been changed. Need to
// modify the mmx and sse versions.
vp9_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); vp9_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
} }