Merge remote branch 'internal/upstream' into HEAD

This commit is contained in:
John Koleszar 2010-09-15 00:05:04 -04:00
commit b1879d9754

View File

@ -196,12 +196,12 @@
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
psubsb xmm2, xmm7 ; p1 - q1 psubsb xmm2, xmm7 ; p1 - q1
pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
movdqa xmm3, xmm0 ; q0
movdqa xmm3, xmm0 ; q0
psubsb xmm0, xmm6 ; q0 - p0 psubsb xmm0, xmm6 ; q0 - p0
paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
@ -211,29 +211,28 @@
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
pxor xmm0, xmm0 punpckhbw xmm5, xmm2 ; axbxcxdx
pxor xmm5, xmm5 punpcklbw xmm2, xmm2 ; exfxgxhx
punpcklbw xmm0, xmm2
punpckhbw xmm5, xmm2 psraw xmm5, 11 ; sign extended shift right by 3
psraw xmm0, 11 psraw xmm2, 11 ; sign extended shift right by 3
psraw xmm5, 11 packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
packsswb xmm0, xmm5
movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; punpcklbw xmm0, xmm1 ; exfxgxhx
punpckhbw xmm1, xmm1 ; axbxcxdx
pxor xmm0, xmm0 ; 0
movdqa xmm5, xmm1 ; abcdefgh
punpcklbw xmm0, xmm1 ; e0f0g0h0
psraw xmm0, 11 ; sign extended shift right by 3 psraw xmm0, 11 ; sign extended shift right by 3
pxor xmm1, xmm1 ; 0
punpckhbw xmm1, xmm5 ; a0b0c0d0
psraw xmm1, 11 ; sign extended shift right by 3 psraw xmm1, 11 ; sign extended shift right by 3
movdqa xmm5, xmm0 ; save results
movdqa xmm5, xmm0 ; save results
packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
paddsw xmm5, [ones GLOBAL] paddsw xmm5, [ones GLOBAL]
paddsw xmm1, [ones GLOBAL] paddsw xmm1, [ones GLOBAL]
psraw xmm5, 1 ; partial shifted one more time for 2nd tap psraw xmm5, 1 ; partial shifted one more time for 2nd tap
psraw xmm1, 1 ; partial shifted one more time for 2nd tap psraw xmm1, 1 ; partial shifted one more time for 2nd tap
packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
pandn xmm4, xmm5 ; high edge variance additive pandn xmm4, xmm5 ; high edge variance additive
%endmacro %endmacro
@ -433,29 +432,27 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
pand xmm2, xmm4; ; Filter2 = vp8_filter & hev pand xmm2, xmm4; ; Filter2 = vp8_filter & hev
movdqa xmm5, xmm2 movdqa xmm5, xmm2
paddsb xmm5, [t3 GLOBAL] paddsb xmm5, [t3 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 3)
punpckhbw xmm7, xmm5 ; axbxcxdx
punpcklbw xmm5, xmm5 ; exfxgxhx
pxor xmm0, xmm0 ; 0
pxor xmm7, xmm7 ; 0
punpcklbw xmm0, xmm5 ; e0f0g0h0
psraw xmm0, 11 ; sign extended shift right by 3
punpckhbw xmm7, xmm5 ; a0b0c0d0
psraw xmm7, 11 ; sign extended shift right by 3 psraw xmm7, 11 ; sign extended shift right by 3
packsswb xmm0, xmm7 ; Filter2 >>=3; psraw xmm5, 11 ; sign extended shift right by 3
movdqa xmm5, xmm0 ; Filter2
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) packsswb xmm5, xmm7 ; Filter2 >>=3;
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
punpckhbw xmm7, xmm2 ; axbxcxdx
punpcklbw xmm0, xmm2 ; exfxgxhx
pxor xmm0, xmm0 ; 0
pxor xmm7, xmm7 ; 0
punpcklbw xmm0, xmm2 ; e0f0g0h0
psraw xmm0, 11 ; sign extended shift right by 3
punpckhbw xmm7, xmm2 ; a0b0c0d0
psraw xmm7, 11 ; sign extended shift right by 3 psraw xmm7, 11 ; sign extended shift right by 3
packsswb xmm0, xmm7 ; Filter2 >>=3; psraw xmm0, 11 ; sign extended shift right by 3
psubsb xmm3, xmm0 ; qs0 =qs0 - filter1 packsswb xmm0, xmm7 ; Filter2 >>=3;
paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2
psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
pandn xmm4, xmm1 ; vp8_filter&=~hev pandn xmm4, xmm1 ; vp8_filter&=~hev
%endmacro %endmacro
@ -465,7 +462,6 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
; *oq0 = s^0x80; ; *oq0 = s^0x80;
; s = vp8_signed_char_clamp(ps0 + u); ; s = vp8_signed_char_clamp(ps0 + u);
; *op0 = s^0x80; ; *op0 = s^0x80;
pxor xmm0, xmm0
pxor xmm1, xmm1 pxor xmm1, xmm1
pxor xmm2, xmm2 pxor xmm2, xmm2
@ -1022,28 +1018,19 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
pxor xmm0, xmm0
pxor xmm5, xmm5
punpcklbw xmm0, xmm2
punpckhbw xmm5, xmm2 punpckhbw xmm5, xmm2
psraw xmm0, 11 punpcklbw xmm2, xmm2
psraw xmm5, 11 psraw xmm5, 11
packsswb xmm0, xmm5 psraw xmm2, 11
movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
punpcklbw xmm0, xmm1 ; exfxgxhx
pxor xmm0, xmm0 ; 0 punpckhbw xmm1, xmm1 ; axbxcxdx
movdqa xmm5, xmm1 ; abcdefgh
punpcklbw xmm0, xmm1 ; e0f0g0h0
psraw xmm0, 11 ; sign extended shift right by 3 psraw xmm0, 11 ; sign extended shift right by 3
pxor xmm1, xmm1 ; 0
punpckhbw xmm1, xmm5 ; a0b0c0d0
psraw xmm1, 11 ; sign extended shift right by 3 psraw xmm1, 11 ; sign extended shift right by 3
movdqa xmm5, xmm0 ; save results movdqa xmm5, xmm0 ; save results
@ -1308,28 +1295,22 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
movdqa xmm5, xmm2 movdqa xmm5, xmm2
paddsb xmm5, [t3 GLOBAL] paddsb xmm5, [t3 GLOBAL]
pxor xmm0, xmm0 ; 0 punpckhbw xmm7, xmm5 ; axbxcxdx
pxor xmm7, xmm7 ; 0 punpcklbw xmm5, xmm5 ; exfxgxhx
punpcklbw xmm0, xmm5 ; e0f0g0h0
psraw xmm0, 11 ; sign extended shift right by 3
punpckhbw xmm7, xmm5 ; a0b0c0d0
psraw xmm7, 11 ; sign extended shift right by 3 psraw xmm7, 11 ; sign extended shift right by 3
psraw xmm5, 11 ; sign extended shift right by 3
packsswb xmm0, xmm7 ; Filter2 >>=3; packsswb xmm5, xmm7 ; Filter2 >>=3;
movdqa xmm5, xmm0 ; Filter2
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
pxor xmm0, xmm0 ; 0
pxor xmm7, xmm7 ; 0 punpcklbw xmm0, xmm2 ; exfxgxhx
punpcklbw xmm0, xmm2 ; e0f0g0h0 punpckhbw xmm7, xmm2 ; axbxcxdx
psraw xmm0, 11 ; sign extended shift right by 3 psraw xmm0, 11 ; sign extended shift right by 3
punpckhbw xmm7, xmm2 ; a0b0c0d0
psraw xmm7, 11 ; sign extended shift right by 3 psraw xmm7, 11 ; sign extended shift right by 3
packsswb xmm0, xmm7 ; Filter2 >>=3; packsswb xmm0, xmm7 ; Filter2 >>=3;
psubsb xmm3, xmm0 ; qs0 =qs0 - filter1 psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
@ -1344,7 +1325,6 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
; *oq0 = s^0x80; ; *oq0 = s^0x80;
; s = vp8_signed_char_clamp(ps0 + u); ; s = vp8_signed_char_clamp(ps0 + u);
; *op0 = s^0x80; ; *op0 = s^0x80;
pxor xmm0, xmm0
pxor xmm1, xmm1 pxor xmm1, xmm1
pxor xmm2, xmm2 pxor xmm2, xmm2