Make RD superblock mode search size-agnostic.

Merge various super_block_yrd and super_block_uvrd versions into one
common function that works for all sizes. Make transform size selection
size-agnostic also. This fixes a slight bug in the intra UV superblock
code where it used the wrong transform size for txsz > 8x8, and stores
the txsz selection for superblocks properly (instead of forgetting it).
Lastly, it removes the trellis search that was done for 16x16 intra
predictors, since trellis is relatively expensive and should thus only
be done after RD mode selection.

Gives basically identical results on derf (+0.009%).

Change-Id: If4485c6f0a0fe4038b3172f7a238477c35a6f8d3
This commit is contained in:
Ronald S. Bultje 2013-04-10 15:55:59 -07:00
parent a4579e04c9
commit b4f6098ef7
6 changed files with 507 additions and 1469 deletions

View File

@ -583,9 +583,6 @@ specialize vp9_sub_pixel_mse32x32
prototype unsigned int vp9_get_mb_ss "const int16_t *"
specialize vp9_get_mb_ss mmx sse2
# ENCODEMB INVOKE
prototype int vp9_mbblock_error "struct macroblock *mb"
specialize vp9_mbblock_error mmx sse2
vp9_mbblock_error_sse2=vp9_mbblock_error_xmm
prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size"
specialize vp9_block_error mmx sse2
@ -594,9 +591,6 @@ vp9_block_error_sse2=vp9_block_error_xmm
prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
specialize vp9_subtract_b mmx sse2
prototype int vp9_mbuverror "struct macroblock *mb"
specialize vp9_mbuverror
prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
specialize vp9_subtract_b mmx sse2

View File

@ -840,15 +840,15 @@ static void pick_sb_modes(VP9_COMP *cpi,
/* Find best coding mode & reconstruct the MB so it is available
* as a predictor for MBs that follow in the SB */
if (cm->frame_type == KEY_FRAME) {
vp9_rd_pick_intra_mode_sb32(cpi, x,
totalrate,
totaldist);
vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist,
BLOCK_SIZE_SB32X32);
/* Save the coding context */
vpx_memcpy(&x->sb32_context[xd->sb_index].mic, xd->mode_info_context,
sizeof(MODE_INFO));
} else {
vp9_rd_pick_inter_mode_sb32(cpi, x, mb_row, mb_col, totalrate, totaldist);
vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col, totalrate, totaldist,
BLOCK_SIZE_SB32X32);
}
}
@ -870,12 +870,14 @@ static void pick_sb64_modes(VP9_COMP *cpi,
/* Find best coding mode & reconstruct the MB so it is available
* as a predictor for MBs that follow in the SB */
if (cm->frame_type == KEY_FRAME) {
vp9_rd_pick_intra_mode_sb64(cpi, x, totalrate, totaldist);
vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist,
BLOCK_SIZE_SB64X64);
/* Save the coding context */
vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context, sizeof(MODE_INFO));
} else {
vp9_rd_pick_inter_mode_sb64(cpi, x, mb_row, mb_col, totalrate, totaldist);
vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col, totalrate, totaldist,
BLOCK_SIZE_SB64X64);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -22,23 +22,16 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *r, int *d);
void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
int *r, int *d);
void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
int *r, int *d);
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int *r, int *d, BLOCK_SIZE_TYPE bsize);
void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
int mb_row, int mb_col,
int *r, int *d);
int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int mb_row, int mb_col,
int *r, int *d);
int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
int mb_row, int mb_col,
int *r, int *d);
int *r, int *d, BLOCK_SIZE_TYPE bsize);
void vp9_init_me_luts();

View File

@ -123,140 +123,3 @@ sym(vp9_block_error_mmx):
UNSHADOW_ARGS
pop rbp
ret
;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
global sym(vp9_mbblock_error_mmx_impl) PRIVATE
sym(vp9_mbblock_error_mmx_impl):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;coeff_ptr
pxor mm7, mm7
mov rdi, arg(1) ;dcoef_ptr
pxor mm2, mm2
mov rcx, 16
.mberror_loop_mmx:
movq mm3, [rsi]
movq mm4, [rdi]
movq mm5, [rsi+8]
movq mm6, [rdi+8]
psubw mm5, mm6
pmaddwd mm5, mm5
psubw mm3, mm4
pmaddwd mm3, mm3
paddd mm2, mm5
paddd mm2, mm3
movq mm3, [rsi+16]
movq mm4, [rdi+16]
movq mm5, [rsi+24]
movq mm6, [rdi+24]
psubw mm5, mm6
pmaddwd mm5, mm5
psubw mm3, mm4
pmaddwd mm3, mm3
paddd mm2, mm5
paddd mm2, mm3
add rsi, 32
add rdi, 32
sub rcx, 1
jnz .mberror_loop_mmx
movq mm0, mm2
psrlq mm2, 32
paddd mm0, mm2
movq rax, mm0
pop rdi
pop rsi
; begin epilog
UNSHADOW_ARGS
pop rbp
ret
;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
global sym(vp9_mbblock_error_xmm_impl) PRIVATE
sym(vp9_mbblock_error_xmm_impl):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
SAVE_XMM 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;coeff_ptr
pxor xmm5, xmm5
mov rdi, arg(1) ;dcoef_ptr
pxor xmm4, xmm4
mov rcx, 16
.mberror_loop:
movdqa xmm0, [rsi]
movdqa xmm1, [rdi]
movdqa xmm2, [rsi+16]
movdqa xmm3, [rdi+16]
psubw xmm2, xmm3
pmaddwd xmm2, xmm2
psubw xmm0, xmm1
pmaddwd xmm0, xmm0
add rsi, 32
add rdi, 32
sub rcx, 1
paddd xmm4, xmm2
paddd xmm4, xmm0
jnz .mberror_loop
movdqa xmm0, xmm4
punpckldq xmm0, xmm5
punpckhdq xmm4, xmm5
paddd xmm0, xmm4
movdqa xmm1, xmm0
psrldq xmm0, 8
paddd xmm0, xmm1
movq rax, xmm0
pop rdi
pop rsi
; begin epilog
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

View File

@ -23,13 +23,6 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) {
vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch);
}
int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
int vp9_mbblock_error_mmx(MACROBLOCK *mb) {
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff;
return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr);
}
void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);
@ -44,13 +37,6 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
#endif
#if HAVE_SSE2
int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
int vp9_mbblock_error_xmm(MACROBLOCK *mb) {
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff;
return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr);
}
void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);