Merge remote branch 'internal/upstream' into HEAD
This commit is contained in:
commit
1805223162
@ -1208,8 +1208,8 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
unsigned char *check_here;
|
||||
int thissad;
|
||||
|
||||
int ref_row = ref_mv->row >> 3;
|
||||
int ref_col = ref_mv->col >> 3;
|
||||
int ref_row = ref_mv->row;
|
||||
int ref_col = ref_mv->col;
|
||||
|
||||
int row_min = ref_row - distance;
|
||||
int row_max = ref_row + distance;
|
||||
@ -1303,8 +1303,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
unsigned char *check_here;
|
||||
unsigned int thissad;
|
||||
|
||||
int ref_row = ref_mv->row >> 3;
|
||||
int ref_col = ref_mv->col >> 3;
|
||||
int ref_row = ref_mv->row;
|
||||
int ref_col = ref_mv->col;
|
||||
|
||||
int row_min = ref_row - distance;
|
||||
int row_max = ref_row + distance;
|
||||
@ -1431,8 +1431,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
unsigned char *check_here;
|
||||
unsigned int thissad;
|
||||
|
||||
int ref_row = ref_mv->row >> 3;
|
||||
int ref_col = ref_mv->col >> 3;
|
||||
int ref_row = ref_mv->row;
|
||||
int ref_col = ref_mv->col;
|
||||
|
||||
int row_min = ref_row - distance;
|
||||
int row_max = ref_row + distance;
|
||||
|
@ -2210,10 +2210,6 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
{
|
||||
int thissme;
|
||||
int full_flag_thresh = 0;
|
||||
MV full_mvp;
|
||||
|
||||
full_mvp.row = d->bmi.mv.as_mv.row <<3; // use diamond search result as full search staring point
|
||||
full_mvp.col = d->bmi.mv.as_mv.col <<3;
|
||||
|
||||
// Update x->vector_range based on best vector found in step search
|
||||
search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
|
||||
@ -2232,7 +2228,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
|
||||
{
|
||||
int sadpb = x->sadperbit16 >> 2;
|
||||
thissme = cpi->full_search_sad(x, b, d, &full_mvp, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
|
||||
/* use diamond search result as full search staring point */
|
||||
thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
|
||||
}
|
||||
|
||||
// Barrier threshold to initiating full search
|
||||
|
@ -233,72 +233,97 @@ ZIGZAG_LOOP 15
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
; int vp8_fast_quantize_b_impl_sse2 | arg
|
||||
; (short *coeff_ptr, | 0
|
||||
; short *qcoeff_ptr, | 1
|
||||
; short *dequant_ptr, | 2
|
||||
; short *inv_scan_order, | 3
|
||||
; short *round_ptr, | 4
|
||||
; short *quant_ptr, | 5
|
||||
; short *dqcoeff_ptr) | 6
|
||||
; void vp8_fast_quantize_b_sse2 | arg
|
||||
; (BLOCK *b, | 0
|
||||
; BLOCKD *d) | 1
|
||||
|
||||
global sym(vp8_fast_quantize_b_impl_sse2)
|
||||
sym(vp8_fast_quantize_b_impl_sse2):
|
||||
global sym(vp8_fast_quantize_b_sse2)
|
||||
sym(vp8_fast_quantize_b_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
push rsi
|
||||
GET_GOT rbx
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
push rdi
|
||||
push rsi
|
||||
%else
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
push rdi
|
||||
push rsi
|
||||
%else
|
||||
; these registers are used for passing arguments
|
||||
%endif
|
||||
%endif
|
||||
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(0) ;coeff_ptr
|
||||
mov rcx, arg(2) ;dequant_ptr
|
||||
mov rdi, arg(4) ;round_ptr
|
||||
mov rsi, arg(5) ;quant_ptr
|
||||
%if ABI_IS_32BIT
|
||||
mov rdi, arg(0) ; BLOCK *b
|
||||
mov rsi, arg(1) ; BLOCKD *d
|
||||
%else
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
mov rdi, rcx ; BLOCK *b
|
||||
mov rsi, rdx ; BLOCKD *d
|
||||
%else
|
||||
;mov rdi, rdi ; BLOCK *b
|
||||
;mov rsi, rsi ; BLOCKD *d
|
||||
%endif
|
||||
%endif
|
||||
|
||||
movdqa xmm0, XMMWORD PTR[rdx]
|
||||
movdqa xmm4, XMMWORD PTR[rdx + 16]
|
||||
mov rax, [rdi + vp8_block_coeff]
|
||||
mov rcx, [rdi + vp8_block_round]
|
||||
mov rdx, [rdi + vp8_block_quant_fast]
|
||||
|
||||
movdqa xmm2, XMMWORD PTR[rdi] ;round lo
|
||||
movdqa xmm3, XMMWORD PTR[rdi + 16] ;round hi
|
||||
; z = coeff
|
||||
movdqa xmm0, [rax]
|
||||
movdqa xmm4, [rax + 16]
|
||||
|
||||
; dup z so we can save sz
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm5, xmm4
|
||||
|
||||
psraw xmm0, 15 ;sign of z (aka sz)
|
||||
psraw xmm4, 15 ;sign of z (aka sz)
|
||||
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
psubw xmm1, xmm0 ;x = abs(z)
|
||||
psubw xmm5, xmm4 ;x = abs(z)
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm3
|
||||
|
||||
pmulhw xmm1, XMMWORD PTR[rsi]
|
||||
pmulhw xmm5, XMMWORD PTR[rsi + 16]
|
||||
|
||||
mov rdi, arg(1) ;qcoeff_ptr
|
||||
mov rsi, arg(6) ;dqcoeff_ptr
|
||||
|
||||
movdqa xmm2, XMMWORD PTR[rcx]
|
||||
movdqa xmm3, XMMWORD PTR[rcx + 16]
|
||||
; sz = z >> 15
|
||||
psraw xmm0, 15
|
||||
psraw xmm4, 15
|
||||
|
||||
; x = abs(z) = (z ^ sz) - sz
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
psubw xmm1, xmm0
|
||||
psubw xmm5, xmm4
|
||||
|
||||
movdqa XMMWORD PTR[rdi], xmm1
|
||||
movdqa XMMWORD PTR[rdi + 16], xmm5
|
||||
; x += round
|
||||
paddw xmm1, [rcx]
|
||||
paddw xmm5, [rcx + 16]
|
||||
|
||||
pmullw xmm2, xmm1
|
||||
pmullw xmm3, xmm5
|
||||
mov rax, [rsi + vp8_blockd_qcoeff]
|
||||
mov rcx, [rsi + vp8_blockd_dequant]
|
||||
mov rdi, [rsi + vp8_blockd_dqcoeff]
|
||||
|
||||
mov rdi, arg(3) ;inv_scan_order
|
||||
; y = x * quant >> 16
|
||||
pmulhw xmm1, [rdx]
|
||||
pmulhw xmm5, [rdx + 16]
|
||||
|
||||
; x = (y ^ sz) - sz
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
psubw xmm1, xmm0
|
||||
psubw xmm5, xmm4
|
||||
|
||||
; qcoeff = x
|
||||
movdqa [rax], xmm1
|
||||
movdqa [rax + 16], xmm5
|
||||
|
||||
; x * dequant
|
||||
movdqa xmm2, xmm1
|
||||
movdqa xmm3, xmm5
|
||||
pmullw xmm2, [rcx]
|
||||
pmullw xmm3, [rcx + 16]
|
||||
|
||||
; dqcoeff = x * dequant
|
||||
movdqa [rdi], xmm2
|
||||
movdqa [rdi + 16], xmm3
|
||||
|
||||
; Start with 16
|
||||
pxor xmm4, xmm4 ;clear all bits
|
||||
pcmpeqw xmm1, xmm4
|
||||
pcmpeqw xmm5, xmm4
|
||||
@ -307,8 +332,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
pxor xmm1, xmm4
|
||||
pxor xmm5, xmm4
|
||||
|
||||
pand xmm1, XMMWORD PTR[rdi]
|
||||
pand xmm5, XMMWORD PTR[rdi+16]
|
||||
pand xmm1, [GLOBAL(inv_zig_zag)]
|
||||
pand xmm5, [GLOBAL(inv_zig_zag + 16)]
|
||||
|
||||
pmaxsw xmm1, xmm5
|
||||
|
||||
@ -327,16 +352,22 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
|
||||
pmaxsw xmm1, xmm5
|
||||
|
||||
movd rax, xmm1
|
||||
and rax, 0xff
|
||||
|
||||
movdqa XMMWORD PTR[rsi], xmm2 ;store dqcoeff
|
||||
movdqa XMMWORD PTR[rsi + 16], xmm3 ;store dqcoeff
|
||||
movd eax, xmm1
|
||||
and eax, 0xff
|
||||
mov [rsi + vp8_blockd_eob], eax
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
%if ABI_IS_32BIT
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rdi
|
||||
%else
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
pop rsi
|
||||
pop rdi
|
||||
%endif
|
||||
%endif
|
||||
|
||||
RESTORE_GOT
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
@ -24,12 +24,16 @@
|
||||
|
||||
#if HAVE_SSE2
|
||||
extern prototype_quantize_block(vp8_regular_quantize_b_sse2);
|
||||
extern prototype_quantize_block(vp8_fast_quantize_b_sse2);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
#undef vp8_quantize_quantb
|
||||
#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
|
||||
|
||||
#undef vp8_quantize_fastquantb
|
||||
#define vp8_quantize_fastquantb vp8_fast_quantize_b_sse2
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -81,31 +81,6 @@ static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
const short *inv_scan_order, short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
static void fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
|
||||
d->eob = vp8_fast_quantize_b_impl_sse2(
|
||||
coeff_ptr,
|
||||
qcoeff_ptr,
|
||||
dequant_ptr,
|
||||
vp8_default_inv_zig_zag,
|
||||
round_ptr,
|
||||
quant_ptr,
|
||||
dqcoeff_ptr
|
||||
);
|
||||
}
|
||||
|
||||
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
|
||||
static int mbblock_error_xmm(MACROBLOCK *mb, int dc)
|
||||
{
|
||||
@ -294,7 +269,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2;
|
||||
|
||||
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;
|
||||
cpi->rtcd.quantize.fastquantb = fast_quantize_b_sse2;
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
|
||||
|
Loading…
Reference in New Issue
Block a user