ac3dsp: do not use pshufb in ac3_extract_exponents_ssse3()
We need to do unsigned saturation in order to cover the corner case when the absolute coefficient value is 16777215 (the maximum value). Fixes Bug #216
This commit is contained in:
parent
e6d9fa66f1
commit
d483bb58c3
@ -35,7 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
|
|||||||
; used in ff_ac3_extract_exponents()
|
; used in ff_ac3_extract_exponents()
|
||||||
pd_1: times 4 dd 1
|
pd_1: times 4 dd 1
|
||||||
pd_151: times 4 dd 151
|
pd_151: times 4 dd 151
|
||||||
pb_shuf_4dwb: db 0, 4, 8, 12
|
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
@ -404,15 +403,12 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%macro AC3_EXTRACT_EXPONENTS 1
|
%macro AC3_EXTRACT_EXPONENTS 1
|
||||||
cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
|
cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
|
||||||
add expq, lenq
|
add expq, lenq
|
||||||
lea coefq, [coefq+4*lenq]
|
lea coefq, [coefq+4*lenq]
|
||||||
neg lenq
|
neg lenq
|
||||||
mova m2, [pd_1]
|
mova m2, [pd_1]
|
||||||
mova m3, [pd_151]
|
mova m3, [pd_151]
|
||||||
%ifidn %1, ssse3 ;
|
|
||||||
movd m4, [pb_shuf_4dwb]
|
|
||||||
%endif
|
|
||||||
.loop:
|
.loop:
|
||||||
; move 4 32-bit coefs to xmm0
|
; move 4 32-bit coefs to xmm0
|
||||||
mova m0, [coefq+4*lenq]
|
mova m0, [coefq+4*lenq]
|
||||||
@ -426,12 +422,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
|
|||||||
mova m0, m3
|
mova m0, m3
|
||||||
psubd m0, m1
|
psubd m0, m1
|
||||||
; move the lowest byte in each of 4 dwords to the low dword
|
; move the lowest byte in each of 4 dwords to the low dword
|
||||||
%ifidn %1, ssse3
|
; NOTE: We cannot just extract the low bytes with pshufb because the dword
|
||||||
pshufb m0, m4
|
; result for 16777215 is -1 due to float inaccuracy. Using packuswb
|
||||||
%else
|
; clips this to 0, which is the correct exponent.
|
||||||
packssdw m0, m0
|
packssdw m0, m0
|
||||||
packuswb m0, m0
|
packuswb m0, m0
|
||||||
%endif
|
|
||||||
movd [expq+lenq], m0
|
movd [expq+lenq], m0
|
||||||
|
|
||||||
add lenq, 4
|
add lenq, 4
|
||||||
|
Loading…
x
Reference in New Issue
Block a user