Merge remote branch 'internal/upstream' into HEAD
This commit is contained in:
commit
b1879d9754
@ -196,12 +196,12 @@
|
|||||||
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values
|
||||||
|
|
||||||
psubsb xmm2, xmm7 ; p1 - q1
|
psubsb xmm2, xmm7 ; p1 - q1
|
||||||
pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
|
|
||||||
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
|
pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values
|
||||||
|
|
||||||
|
pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1)
|
||||||
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
|
pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values
|
||||||
movdqa xmm3, xmm0 ; q0
|
|
||||||
|
|
||||||
|
movdqa xmm3, xmm0 ; q0
|
||||||
psubsb xmm0, xmm6 ; q0 - p0
|
psubsb xmm0, xmm6 ; q0 - p0
|
||||||
paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
|
paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
|
||||||
paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
|
paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
|
||||||
@ -211,29 +211,28 @@
|
|||||||
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||||
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||||
|
|
||||||
pxor xmm0, xmm0
|
punpckhbw xmm5, xmm2 ; axbxcxdx
|
||||||
pxor xmm5, xmm5
|
punpcklbw xmm2, xmm2 ; exfxgxhx
|
||||||
punpcklbw xmm0, xmm2
|
|
||||||
punpckhbw xmm5, xmm2
|
psraw xmm5, 11 ; sign extended shift right by 3
|
||||||
psraw xmm0, 11
|
psraw xmm2, 11 ; sign extended shift right by 3
|
||||||
psraw xmm5, 11
|
packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
|
||||||
packsswb xmm0, xmm5
|
|
||||||
movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
|
punpcklbw xmm0, xmm1 ; exfxgxhx
|
||||||
|
punpckhbw xmm1, xmm1 ; axbxcxdx
|
||||||
|
|
||||||
pxor xmm0, xmm0 ; 0
|
|
||||||
movdqa xmm5, xmm1 ; abcdefgh
|
|
||||||
punpcklbw xmm0, xmm1 ; e0f0g0h0
|
|
||||||
psraw xmm0, 11 ; sign extended shift right by 3
|
psraw xmm0, 11 ; sign extended shift right by 3
|
||||||
pxor xmm1, xmm1 ; 0
|
|
||||||
punpckhbw xmm1, xmm5 ; a0b0c0d0
|
|
||||||
psraw xmm1, 11 ; sign extended shift right by 3
|
psraw xmm1, 11 ; sign extended shift right by 3
|
||||||
movdqa xmm5, xmm0 ; save results
|
|
||||||
|
|
||||||
|
movdqa xmm5, xmm0 ; save results
|
||||||
packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
|
packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
|
||||||
|
|
||||||
paddsw xmm5, [ones GLOBAL]
|
paddsw xmm5, [ones GLOBAL]
|
||||||
paddsw xmm1, [ones GLOBAL]
|
paddsw xmm1, [ones GLOBAL]
|
||||||
|
|
||||||
psraw xmm5, 1 ; partial shifted one more time for 2nd tap
|
psraw xmm5, 1 ; partial shifted one more time for 2nd tap
|
||||||
psraw xmm1, 1 ; partial shifted one more time for 2nd tap
|
psraw xmm1, 1 ; partial shifted one more time for 2nd tap
|
||||||
|
|
||||||
packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
|
packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
|
||||||
pandn xmm4, xmm5 ; high edge variance additive
|
pandn xmm4, xmm5 ; high edge variance additive
|
||||||
%endmacro
|
%endmacro
|
||||||
@ -433,29 +432,27 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||||||
pand xmm2, xmm4; ; Filter2 = vp8_filter & hev
|
pand xmm2, xmm4; ; Filter2 = vp8_filter & hev
|
||||||
|
|
||||||
movdqa xmm5, xmm2
|
movdqa xmm5, xmm2
|
||||||
paddsb xmm5, [t3 GLOBAL]
|
paddsb xmm5, [t3 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 3)
|
||||||
|
|
||||||
|
punpckhbw xmm7, xmm5 ; axbxcxdx
|
||||||
|
punpcklbw xmm5, xmm5 ; exfxgxhx
|
||||||
|
|
||||||
pxor xmm0, xmm0 ; 0
|
|
||||||
pxor xmm7, xmm7 ; 0
|
|
||||||
punpcklbw xmm0, xmm5 ; e0f0g0h0
|
|
||||||
psraw xmm0, 11 ; sign extended shift right by 3
|
|
||||||
punpckhbw xmm7, xmm5 ; a0b0c0d0
|
|
||||||
psraw xmm7, 11 ; sign extended shift right by 3
|
psraw xmm7, 11 ; sign extended shift right by 3
|
||||||
packsswb xmm0, xmm7 ; Filter2 >>=3;
|
psraw xmm5, 11 ; sign extended shift right by 3
|
||||||
movdqa xmm5, xmm0 ; Filter2
|
|
||||||
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
|
packsswb xmm5, xmm7 ; Filter2 >>=3;
|
||||||
|
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||||
|
|
||||||
|
punpckhbw xmm7, xmm2 ; axbxcxdx
|
||||||
|
punpcklbw xmm0, xmm2 ; exfxgxhx
|
||||||
|
|
||||||
pxor xmm0, xmm0 ; 0
|
|
||||||
pxor xmm7, xmm7 ; 0
|
|
||||||
punpcklbw xmm0, xmm2 ; e0f0g0h0
|
|
||||||
psraw xmm0, 11 ; sign extended shift right by 3
|
|
||||||
punpckhbw xmm7, xmm2 ; a0b0c0d0
|
|
||||||
psraw xmm7, 11 ; sign extended shift right by 3
|
psraw xmm7, 11 ; sign extended shift right by 3
|
||||||
packsswb xmm0, xmm7 ; Filter2 >>=3;
|
psraw xmm0, 11 ; sign extended shift right by 3
|
||||||
|
|
||||||
psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
|
packsswb xmm0, xmm7 ; Filter2 >>=3;
|
||||||
paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2
|
paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2
|
||||||
|
|
||||||
|
psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
|
||||||
pandn xmm4, xmm1 ; vp8_filter&=~hev
|
pandn xmm4, xmm1 ; vp8_filter&=~hev
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
@ -465,7 +462,6 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||||||
; *oq0 = s^0x80;
|
; *oq0 = s^0x80;
|
||||||
; s = vp8_signed_char_clamp(ps0 + u);
|
; s = vp8_signed_char_clamp(ps0 + u);
|
||||||
; *op0 = s^0x80;
|
; *op0 = s^0x80;
|
||||||
pxor xmm0, xmm0
|
|
||||||
pxor xmm1, xmm1
|
pxor xmm1, xmm1
|
||||||
|
|
||||||
pxor xmm2, xmm2
|
pxor xmm2, xmm2
|
||||||
@ -1022,28 +1018,19 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||||||
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
|
||||||
|
|
||||||
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
|
||||||
pxor xmm0, xmm0
|
|
||||||
|
|
||||||
pxor xmm5, xmm5
|
|
||||||
punpcklbw xmm0, xmm2
|
|
||||||
|
|
||||||
punpckhbw xmm5, xmm2
|
punpckhbw xmm5, xmm2
|
||||||
psraw xmm0, 11
|
punpcklbw xmm2, xmm2
|
||||||
|
|
||||||
psraw xmm5, 11
|
psraw xmm5, 11
|
||||||
packsswb xmm0, xmm5
|
psraw xmm2, 11
|
||||||
|
|
||||||
movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
|
packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
|
||||||
|
punpcklbw xmm0, xmm1 ; exfxgxhx
|
||||||
|
|
||||||
pxor xmm0, xmm0 ; 0
|
punpckhbw xmm1, xmm1 ; axbxcxdx
|
||||||
movdqa xmm5, xmm1 ; abcdefgh
|
|
||||||
|
|
||||||
punpcklbw xmm0, xmm1 ; e0f0g0h0
|
|
||||||
psraw xmm0, 11 ; sign extended shift right by 3
|
psraw xmm0, 11 ; sign extended shift right by 3
|
||||||
|
|
||||||
pxor xmm1, xmm1 ; 0
|
|
||||||
punpckhbw xmm1, xmm5 ; a0b0c0d0
|
|
||||||
|
|
||||||
psraw xmm1, 11 ; sign extended shift right by 3
|
psraw xmm1, 11 ; sign extended shift right by 3
|
||||||
movdqa xmm5, xmm0 ; save results
|
movdqa xmm5, xmm0 ; save results
|
||||||
|
|
||||||
@ -1308,28 +1295,22 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
|||||||
movdqa xmm5, xmm2
|
movdqa xmm5, xmm2
|
||||||
paddsb xmm5, [t3 GLOBAL]
|
paddsb xmm5, [t3 GLOBAL]
|
||||||
|
|
||||||
pxor xmm0, xmm0 ; 0
|
punpckhbw xmm7, xmm5 ; axbxcxdx
|
||||||
pxor xmm7, xmm7 ; 0
|
punpcklbw xmm5, xmm5 ; exfxgxhx
|
||||||
|
|
||||||
punpcklbw xmm0, xmm5 ; e0f0g0h0
|
|
||||||
psraw xmm0, 11 ; sign extended shift right by 3
|
|
||||||
|
|
||||||
punpckhbw xmm7, xmm5 ; a0b0c0d0
|
|
||||||
psraw xmm7, 11 ; sign extended shift right by 3
|
psraw xmm7, 11 ; sign extended shift right by 3
|
||||||
|
psraw xmm5, 11 ; sign extended shift right by 3
|
||||||
|
|
||||||
packsswb xmm0, xmm7 ; Filter2 >>=3;
|
packsswb xmm5, xmm7 ; Filter2 >>=3;
|
||||||
movdqa xmm5, xmm0 ; Filter2
|
|
||||||
|
|
||||||
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
|
paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4)
|
||||||
pxor xmm0, xmm0 ; 0
|
|
||||||
|
|
||||||
pxor xmm7, xmm7 ; 0
|
punpcklbw xmm0, xmm2 ; exfxgxhx
|
||||||
punpcklbw xmm0, xmm2 ; e0f0g0h0
|
punpckhbw xmm7, xmm2 ; axbxcxdx
|
||||||
|
|
||||||
psraw xmm0, 11 ; sign extended shift right by 3
|
psraw xmm0, 11 ; sign extended shift right by 3
|
||||||
punpckhbw xmm7, xmm2 ; a0b0c0d0
|
|
||||||
|
|
||||||
psraw xmm7, 11 ; sign extended shift right by 3
|
psraw xmm7, 11 ; sign extended shift right by 3
|
||||||
|
|
||||||
packsswb xmm0, xmm7 ; Filter2 >>=3;
|
packsswb xmm0, xmm7 ; Filter2 >>=3;
|
||||||
|
|
||||||
psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
|
psubsb xmm3, xmm0 ; qs0 =qs0 - filter1
|
||||||
@ -1344,7 +1325,6 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
|||||||
; *oq0 = s^0x80;
|
; *oq0 = s^0x80;
|
||||||
; s = vp8_signed_char_clamp(ps0 + u);
|
; s = vp8_signed_char_clamp(ps0 + u);
|
||||||
; *op0 = s^0x80;
|
; *op0 = s^0x80;
|
||||||
pxor xmm0, xmm0
|
|
||||||
pxor xmm1, xmm1
|
pxor xmm1, xmm1
|
||||||
|
|
||||||
pxor xmm2, xmm2
|
pxor xmm2, xmm2
|
||||||
|
Loading…
Reference in New Issue
Block a user