Merge "Tune SSSE3 assembly implementation to improve quantization speed"
This commit is contained in:
commit
30e9c091c0
@ -282,6 +282,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
psignw m8, m9
|
psignw m8, m9
|
||||||
psignw m13, m10
|
psignw m13, m10
|
||||||
psrlw m0, m3, 2
|
psrlw m0, m3, 2
|
||||||
|
%else
|
||||||
|
psrlw m0, m3, 1
|
||||||
%endif
|
%endif
|
||||||
mova [r4q+ncoeffq*2+ 0], m8
|
mova [r4q+ncoeffq*2+ 0], m8
|
||||||
mova [r4q+ncoeffq*2+16], m13
|
mova [r4q+ncoeffq*2+16], m13
|
||||||
@ -302,7 +304,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
|
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
|
||||||
pabsw m6, m9 ; m6 = abs(m9)
|
pabsw m6, m9 ; m6 = abs(m9)
|
||||||
pabsw m11, m10 ; m11 = abs(m10)
|
pabsw m11, m10 ; m11 = abs(m10)
|
||||||
%ifidn %1, fp_32x32
|
|
||||||
pcmpgtw m7, m6, m0
|
pcmpgtw m7, m6, m0
|
||||||
pcmpgtw m12, m11, m0
|
pcmpgtw m12, m11, m0
|
||||||
pmovmskb r6d, m7
|
pmovmskb r6d, m7
|
||||||
@ -310,7 +312,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
|
|
||||||
or r6, r2
|
or r6, r2
|
||||||
jz .skip_iter
|
jz .skip_iter
|
||||||
%endif
|
|
||||||
pcmpeqw m7, m7
|
pcmpeqw m7, m7
|
||||||
|
|
||||||
paddsw m6, m1 ; m6 += round
|
paddsw m6, m1 ; m6 += round
|
||||||
@ -348,7 +350,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
add ncoeffq, mmsize
|
add ncoeffq, mmsize
|
||||||
jl .ac_only_loop
|
jl .ac_only_loop
|
||||||
|
|
||||||
%ifidn %1, fp_32x32
|
|
||||||
jmp .accumulate_eob
|
jmp .accumulate_eob
|
||||||
.skip_iter:
|
.skip_iter:
|
||||||
mova [r3q+ncoeffq*2+ 0], m5
|
mova [r3q+ncoeffq*2+ 0], m5
|
||||||
@ -357,7 +358,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
mova [r4q+ncoeffq*2+16], m5
|
mova [r4q+ncoeffq*2+16], m5
|
||||||
add ncoeffq, mmsize
|
add ncoeffq, mmsize
|
||||||
jl .ac_only_loop
|
jl .ac_only_loop
|
||||||
%endif
|
|
||||||
|
|
||||||
.accumulate_eob:
|
.accumulate_eob:
|
||||||
; horizontally accumulate/max eobs and write into [eob] memory pointer
|
; horizontally accumulate/max eobs and write into [eob] memory pointer
|
||||||
|
Loading…
x
Reference in New Issue
Block a user