Merge remote branch 'internal/upstream' into HEAD

This commit is contained in:
John Koleszar 2010-12-11 00:05:08 -05:00
commit eb1c033731
4 changed files with 40 additions and 64 deletions

View File

@ -36,6 +36,14 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
7, 11, 14, 15, 7, 11, 14, 15,
}; };
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
{
1, 2, 6, 7,
3, 5, 8, 13,
4, 9, 12, 14,
10, 11, 15, 16
};
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]); DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};

View File

@ -95,6 +95,7 @@ struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *); void vp8_default_coef_probs(struct VP8Common *);
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
extern short vp8_default_zig_zag_mask[16]; extern short vp8_default_zig_zag_mask[16];
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];

View File

@ -253,10 +253,9 @@ rq_zigzag_1c:
pop rbp pop rbp
ret ret
;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, ;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
; short *qcoeff_ptr,short *dequant_ptr, ; short *qcoeff_ptr,short *dequant_ptr,
; short *scan_mask, short *round_ptr, ; short *inv_scan_order, short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr); ; short *quant_ptr, short *dqcoeff_ptr);
global sym(vp8_fast_quantize_b_impl_sse2) global sym(vp8_fast_quantize_b_impl_sse2)
sym(vp8_fast_quantize_b_impl_sse2): sym(vp8_fast_quantize_b_impl_sse2):
@ -265,32 +264,18 @@ sym(vp8_fast_quantize_b_impl_sse2):
SHADOW_ARGS_TO_STACK 7 SHADOW_ARGS_TO_STACK 7
push rsi push rsi
push rdi push rdi
push rbx
; end prolog ; end prolog
ALIGN_STACK 16, rax
%define save_xmm6 0
%define save_xmm7 16
%define vp8_fastquantizeb_stack_size save_xmm7 + 16
sub rsp, vp8_fastquantizeb_stack_size
movdqa XMMWORD PTR[rsp + save_xmm6], xmm6
movdqa XMMWORD PTR[rsp + save_xmm7], xmm7
mov rdx, arg(0) ;coeff_ptr mov rdx, arg(0) ;coeff_ptr
mov rcx, arg(2) ;dequant_ptr mov rcx, arg(2) ;dequant_ptr
mov rax, arg(3) ;scan_mask
mov rdi, arg(4) ;round_ptr mov rdi, arg(4) ;round_ptr
mov rsi, arg(5) ;quant_ptr mov rsi, arg(5) ;quant_ptr
movdqa xmm0, XMMWORD PTR[rdx] movdqa xmm0, XMMWORD PTR[rdx]
movdqa xmm4, XMMWORD PTR[rdx + 16] movdqa xmm4, XMMWORD PTR[rdx + 16]
movdqa xmm6, XMMWORD PTR[rdi] ;round lo movdqa xmm2, XMMWORD PTR[rdi] ;round lo
movdqa xmm7, XMMWORD PTR[rdi + 16] ;round hi movdqa xmm3, XMMWORD PTR[rdi + 16] ;round hi
movdqa xmm1, xmm0 movdqa xmm1, xmm0
movdqa xmm5, xmm4 movdqa xmm5, xmm4
@ -303,8 +288,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
psubw xmm1, xmm0 ;x = abs(z) psubw xmm1, xmm0 ;x = abs(z)
psubw xmm5, xmm4 ;x = abs(z) psubw xmm5, xmm4 ;x = abs(z)
paddw xmm1, xmm6 paddw xmm1, xmm2
paddw xmm5, xmm7 paddw xmm5, xmm3
pmulhw xmm1, XMMWORD PTR[rsi] pmulhw xmm1, XMMWORD PTR[rsi]
pmulhw xmm5, XMMWORD PTR[rsi + 16] pmulhw xmm5, XMMWORD PTR[rsi + 16]
@ -312,8 +297,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
mov rdi, arg(1) ;qcoeff_ptr mov rdi, arg(1) ;qcoeff_ptr
mov rsi, arg(6) ;dqcoeff_ptr mov rsi, arg(6) ;dqcoeff_ptr
movdqa xmm6, XMMWORD PTR[rcx] movdqa xmm2, XMMWORD PTR[rcx]
movdqa xmm7, XMMWORD PTR[rcx + 16] movdqa xmm3, XMMWORD PTR[rcx + 16]
pxor xmm1, xmm0 pxor xmm1, xmm0
pxor xmm5, xmm4 pxor xmm5, xmm4
@ -323,64 +308,47 @@ sym(vp8_fast_quantize_b_impl_sse2):
movdqa XMMWORD PTR[rdi], xmm1 movdqa XMMWORD PTR[rdi], xmm1
movdqa XMMWORD PTR[rdi + 16], xmm5 movdqa XMMWORD PTR[rdi + 16], xmm5
pmullw xmm6, xmm1 pmullw xmm2, xmm1
pmullw xmm7, xmm5 pmullw xmm3, xmm5
movdqa xmm2, XMMWORD PTR[rax] mov rdi, arg(3) ;inv_scan_order
movdqa xmm3, XMMWORD PTR[rax+16];
pxor xmm4, xmm4 ;clear all bits ; Start with 16
pxor xmm4, xmm4 ;clear all bits
pcmpeqw xmm1, xmm4 pcmpeqw xmm1, xmm4
pcmpeqw xmm5, xmm4 pcmpeqw xmm5, xmm4
pcmpeqw xmm4, xmm4 ;set all bits pcmpeqw xmm4, xmm4 ;set all bits
pxor xmm1, xmm4 pxor xmm1, xmm4
pxor xmm5, xmm4 pxor xmm5, xmm4
psrlw xmm1, 15 pand xmm1, XMMWORD PTR[rdi]
psrlw xmm5, 15 pand xmm5, XMMWORD PTR[rdi+16]
pmaddwd xmm1, xmm2 pmaxsw xmm1, xmm5
pmaddwd xmm5, xmm3
movq xmm2, xmm1 ; now down to 8
movq xmm3, xmm5 pshufd xmm5, xmm1, 00001110b
psrldq xmm1, 8 pmaxsw xmm1, xmm5
psrldq xmm5, 8
paddd xmm1, xmm5 ; only 4 left
paddd xmm2, xmm3 pshuflw xmm5, xmm1, 00001110b
paddd xmm1, xmm2 pmaxsw xmm1, xmm5
movq xmm5, xmm1
psrldq xmm1, 4 ; okay, just 2!
paddd xmm5, xmm1 pshuflw xmm5, xmm1, 00000001b
movq rcx, xmm5 pmaxsw xmm1, xmm5
and rcx, 0xffff
xor rdx, rdx movd rax, xmm1
sub rdx, rcx and rax, 0xff
bsr rax, rcx movdqa XMMWORD PTR[rsi], xmm2 ;store dqcoeff
inc rax movdqa XMMWORD PTR[rsi + 16], xmm3 ;store dqcoeff
sar rdx, 31
and rax, rdx
movdqa XMMWORD PTR[rsi], xmm6 ;store dqcoeff
movdqa XMMWORD PTR[rsi + 16], xmm7 ;store dqcoeff
movdqa xmm6, XMMWORD PTR[rsp + save_xmm6]
movdqa xmm7, XMMWORD PTR[rsp + save_xmm7]
add rsp, vp8_fastquantizeb_stack_size
pop rsp
; begin epilog ; begin epilog
pop rbx
pop rdi pop rdi
pop rsi pop rsi
UNSHADOW_ARGS UNSHADOW_ARGS

View File

@ -83,7 +83,7 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
#if HAVE_SSE2 #if HAVE_SSE2
int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
short *qcoeff_ptr, short *dequant_ptr, short *qcoeff_ptr, short *dequant_ptr,
short *scan_mask, short *round_ptr, const short *inv_scan_order, short *round_ptr,
short *quant_ptr, short *dqcoeff_ptr); short *quant_ptr, short *dqcoeff_ptr);
void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
{ {
@ -99,8 +99,7 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
coeff_ptr, coeff_ptr,
qcoeff_ptr, qcoeff_ptr,
dequant_ptr, dequant_ptr,
scan_mask, vp8_default_inv_zig_zag,
round_ptr, round_ptr,
quant_ptr, quant_ptr,
dqcoeff_ptr dqcoeff_ptr