Make RD superblock mode search size-agnostic.
Merge various super_block_yrd and super_block_uvrd versions into one common function that works for all sizes. Make transform size selection size-agnostic also. This fixes a slight bug in the intra UV superblock code where it used the wrong transform size for txsz > 8x8, and stores the txsz selection for superblocks properly (instead of forgetting it). Lastly, it removes the trellis search that was done for 16x16 intra predictors, since trellis is relatively expensive and should thus only be done after RD mode selection. Gives basically identical results on derf (+0.009%). Change-Id: If4485c6f0a0fe4038b3172f7a238477c35a6f8d3
This commit is contained in:
parent
a4579e04c9
commit
b4f6098ef7
@ -583,9 +583,6 @@ specialize vp9_sub_pixel_mse32x32
|
||||
prototype unsigned int vp9_get_mb_ss "const int16_t *"
|
||||
specialize vp9_get_mb_ss mmx sse2
|
||||
# ENCODEMB INVOKE
|
||||
prototype int vp9_mbblock_error "struct macroblock *mb"
|
||||
specialize vp9_mbblock_error mmx sse2
|
||||
vp9_mbblock_error_sse2=vp9_mbblock_error_xmm
|
||||
|
||||
prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size"
|
||||
specialize vp9_block_error mmx sse2
|
||||
@ -594,9 +591,6 @@ vp9_block_error_sse2=vp9_block_error_xmm
|
||||
prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
|
||||
specialize vp9_subtract_b mmx sse2
|
||||
|
||||
prototype int vp9_mbuverror "struct macroblock *mb"
|
||||
specialize vp9_mbuverror
|
||||
|
||||
prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
|
||||
specialize vp9_subtract_b mmx sse2
|
||||
|
||||
|
@ -840,15 +840,15 @@ static void pick_sb_modes(VP9_COMP *cpi,
|
||||
/* Find best coding mode & reconstruct the MB so it is available
|
||||
* as a predictor for MBs that follow in the SB */
|
||||
if (cm->frame_type == KEY_FRAME) {
|
||||
vp9_rd_pick_intra_mode_sb32(cpi, x,
|
||||
totalrate,
|
||||
totaldist);
|
||||
vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist,
|
||||
BLOCK_SIZE_SB32X32);
|
||||
|
||||
/* Save the coding context */
|
||||
vpx_memcpy(&x->sb32_context[xd->sb_index].mic, xd->mode_info_context,
|
||||
sizeof(MODE_INFO));
|
||||
} else {
|
||||
vp9_rd_pick_inter_mode_sb32(cpi, x, mb_row, mb_col, totalrate, totaldist);
|
||||
vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col, totalrate, totaldist,
|
||||
BLOCK_SIZE_SB32X32);
|
||||
}
|
||||
}
|
||||
|
||||
@ -870,12 +870,14 @@ static void pick_sb64_modes(VP9_COMP *cpi,
|
||||
/* Find best coding mode & reconstruct the MB so it is available
|
||||
* as a predictor for MBs that follow in the SB */
|
||||
if (cm->frame_type == KEY_FRAME) {
|
||||
vp9_rd_pick_intra_mode_sb64(cpi, x, totalrate, totaldist);
|
||||
vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist,
|
||||
BLOCK_SIZE_SB64X64);
|
||||
|
||||
/* Save the coding context */
|
||||
vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context, sizeof(MODE_INFO));
|
||||
} else {
|
||||
vp9_rd_pick_inter_mode_sb64(cpi, x, mb_row, mb_col, totalrate, totaldist);
|
||||
vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col, totalrate, totaldist,
|
||||
BLOCK_SIZE_SB64X64);
|
||||
}
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -22,23 +22,16 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
|
||||
void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int *r, int *d);
|
||||
|
||||
void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int *r, int *d);
|
||||
|
||||
void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int *r, int *d);
|
||||
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int *r, int *d, BLOCK_SIZE_TYPE bsize);
|
||||
|
||||
void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int mb_row, int mb_col,
|
||||
int *r, int *d);
|
||||
|
||||
int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int mb_row, int mb_col,
|
||||
int *r, int *d);
|
||||
|
||||
int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int mb_row, int mb_col,
|
||||
int *r, int *d);
|
||||
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
int mb_row, int mb_col,
|
||||
int *r, int *d, BLOCK_SIZE_TYPE bsize);
|
||||
|
||||
void vp9_init_me_luts();
|
||||
|
||||
|
@ -123,140 +123,3 @@ sym(vp9_block_error_mmx):
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
|
||||
global sym(vp9_mbblock_error_mmx_impl) PRIVATE
|
||||
sym(vp9_mbblock_error_mmx_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rsi, arg(0) ;coeff_ptr
|
||||
pxor mm7, mm7
|
||||
|
||||
mov rdi, arg(1) ;dcoef_ptr
|
||||
pxor mm2, mm2
|
||||
|
||||
mov rcx, 16
|
||||
|
||||
.mberror_loop_mmx:
|
||||
movq mm3, [rsi]
|
||||
movq mm4, [rdi]
|
||||
|
||||
movq mm5, [rsi+8]
|
||||
movq mm6, [rdi+8]
|
||||
|
||||
|
||||
psubw mm5, mm6
|
||||
pmaddwd mm5, mm5
|
||||
|
||||
psubw mm3, mm4
|
||||
|
||||
pmaddwd mm3, mm3
|
||||
paddd mm2, mm5
|
||||
|
||||
paddd mm2, mm3
|
||||
movq mm3, [rsi+16]
|
||||
|
||||
movq mm4, [rdi+16]
|
||||
movq mm5, [rsi+24]
|
||||
|
||||
movq mm6, [rdi+24]
|
||||
psubw mm5, mm6
|
||||
|
||||
pmaddwd mm5, mm5
|
||||
psubw mm3, mm4
|
||||
|
||||
pmaddwd mm3, mm3
|
||||
paddd mm2, mm5
|
||||
|
||||
paddd mm2, mm3
|
||||
add rsi, 32
|
||||
|
||||
add rdi, 32
|
||||
sub rcx, 1
|
||||
|
||||
jnz .mberror_loop_mmx
|
||||
|
||||
movq mm0, mm2
|
||||
psrlq mm2, 32
|
||||
|
||||
paddd mm0, mm2
|
||||
movq rax, mm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
|
||||
global sym(vp9_mbblock_error_xmm_impl) PRIVATE
|
||||
sym(vp9_mbblock_error_xmm_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
SAVE_XMM 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rsi, arg(0) ;coeff_ptr
|
||||
pxor xmm5, xmm5
|
||||
|
||||
mov rdi, arg(1) ;dcoef_ptr
|
||||
pxor xmm4, xmm4
|
||||
|
||||
mov rcx, 16
|
||||
|
||||
.mberror_loop:
|
||||
movdqa xmm0, [rsi]
|
||||
movdqa xmm1, [rdi]
|
||||
|
||||
movdqa xmm2, [rsi+16]
|
||||
movdqa xmm3, [rdi+16]
|
||||
|
||||
|
||||
psubw xmm2, xmm3
|
||||
pmaddwd xmm2, xmm2
|
||||
|
||||
psubw xmm0, xmm1
|
||||
|
||||
pmaddwd xmm0, xmm0
|
||||
add rsi, 32
|
||||
|
||||
add rdi, 32
|
||||
|
||||
sub rcx, 1
|
||||
paddd xmm4, xmm2
|
||||
|
||||
paddd xmm4, xmm0
|
||||
jnz .mberror_loop
|
||||
|
||||
movdqa xmm0, xmm4
|
||||
punpckldq xmm0, xmm5
|
||||
|
||||
punpckhdq xmm4, xmm5
|
||||
paddd xmm0, xmm4
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
psrldq xmm0, 8
|
||||
|
||||
paddd xmm0, xmm1
|
||||
movq rax, xmm0
|
||||
|
||||
pop rdi
|
||||
pop rsi
|
||||
; begin epilog
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -23,13 +23,6 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) {
|
||||
vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch);
|
||||
}
|
||||
|
||||
int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
|
||||
int vp9_mbblock_error_mmx(MACROBLOCK *mb) {
|
||||
short *coeff_ptr = mb->block[0].coeff;
|
||||
short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff;
|
||||
return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr);
|
||||
}
|
||||
|
||||
void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
@ -44,13 +37,6 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
|
||||
int vp9_mbblock_error_xmm(MACROBLOCK *mb) {
|
||||
short *coeff_ptr = mb->block[0].coeff;
|
||||
short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff;
|
||||
return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr);
|
||||
}
|
||||
|
||||
void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
|
Loading…
Reference in New Issue
Block a user