Fix ssse3 quantize_fp functions while skip=1
In ssse3 functions, DEFINE_ARGS macro hard codes qcoeff and dqcoeff to r3 and r4. If skip is 1, qcoeff and dqcoeff need to be loaded from the stack, which doesn't work because of the above definitions. Currently, skip=1 case is not used in the encoder. This patch fixed the issue, so it can be turned on later. Change-Id: I998d696b1a7a85dca2b3bcee790b21c21e039147
This commit is contained in:
parent
44adb8e283
commit
58e0159c80
@ -15,6 +15,7 @@ pw_1: times 8 dw 1
|
|||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
|
; TODO(yunqingwang)fix quantize_b code for skip=1 case.
|
||||||
%macro QUANTIZE_FN 2
|
%macro QUANTIZE_FN 2
|
||||||
cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||||
shift, qcoeff, dqcoeff, dequant, \
|
shift, qcoeff, dqcoeff, dequant, \
|
||||||
@ -244,11 +245,11 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
psllw m2, 1
|
psllw m2, 1
|
||||||
%endif
|
%endif
|
||||||
pxor m5, m5 ; m5 = dedicated zero
|
pxor m5, m5 ; m5 = dedicated zero
|
||||||
DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
|
|
||||||
lea coeffq, [ coeffq+ncoeffq*2]
|
lea coeffq, [ coeffq+ncoeffq*2]
|
||||||
lea iscanq, [ iscanq+ncoeffq*2]
|
lea r5q, [ r5q+ncoeffq*2]
|
||||||
lea qcoeffq, [ qcoeffq+ncoeffq*2]
|
lea r3q, [ r3q+ncoeffq*2]
|
||||||
lea dqcoeffq, [dqcoeffq+ncoeffq*2]
|
lea r4q, [r4q+ncoeffq*2]
|
||||||
neg ncoeffq
|
neg ncoeffq
|
||||||
|
|
||||||
; get DC and first 15 AC coeffs
|
; get DC and first 15 AC coeffs
|
||||||
@ -266,15 +267,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
pmulhw m13, m11, m2 ; m13 = m11*q>>16
|
pmulhw m13, m11, m2 ; m13 = m11*q>>16
|
||||||
psignw m8, m9 ; m8 = reinsert sign
|
psignw m8, m9 ; m8 = reinsert sign
|
||||||
psignw m13, m10 ; m13 = reinsert sign
|
psignw m13, m10 ; m13 = reinsert sign
|
||||||
mova [qcoeffq+ncoeffq*2+ 0], m8
|
mova [r3q+ncoeffq*2+ 0], m8
|
||||||
mova [qcoeffq+ncoeffq*2+16], m13
|
mova [r3q+ncoeffq*2+16], m13
|
||||||
%ifidn %1, fp_32x32
|
%ifidn %1, fp_32x32
|
||||||
pabsw m8, m8
|
pabsw m8, m8
|
||||||
pabsw m13, m13
|
pabsw m13, m13
|
||||||
%endif
|
%endif
|
||||||
pmullw m8, m3 ; dqc[i] = qc[i] * q
|
pmullw m8, m3 ; r4[i] = r3[i] * q
|
||||||
punpckhqdq m3, m3
|
punpckhqdq m3, m3
|
||||||
pmullw m13, m3 ; dqc[i] = qc[i] * q
|
pmullw m13, m3 ; r4[i] = r3[i] * q
|
||||||
%ifidn %1, fp_32x32
|
%ifidn %1, fp_32x32
|
||||||
psrlw m8, 1
|
psrlw m8, 1
|
||||||
psrlw m13, 1
|
psrlw m13, 1
|
||||||
@ -282,12 +283,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
psignw m13, m10
|
psignw m13, m10
|
||||||
psrlw m0, m3, 2
|
psrlw m0, m3, 2
|
||||||
%endif
|
%endif
|
||||||
mova [dqcoeffq+ncoeffq*2+ 0], m8
|
mova [r4q+ncoeffq*2+ 0], m8
|
||||||
mova [dqcoeffq+ncoeffq*2+16], m13
|
mova [r4q+ncoeffq*2+16], m13
|
||||||
pcmpeqw m8, m5 ; m8 = c[i] == 0
|
pcmpeqw m8, m5 ; m8 = c[i] == 0
|
||||||
pcmpeqw m13, m5 ; m13 = c[i] == 0
|
pcmpeqw m13, m5 ; m13 = c[i] == 0
|
||||||
mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
|
mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
|
||||||
mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
|
mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
|
||||||
psubw m6, m7 ; m6 = scan[i] + 1
|
psubw m6, m7 ; m6 = scan[i] + 1
|
||||||
psubw m11, m7 ; m11 = scan[i] + 1
|
psubw m11, m7 ; m11 = scan[i] + 1
|
||||||
pandn m8, m6 ; m8 = max(eob)
|
pandn m8, m6 ; m8 = max(eob)
|
||||||
@ -318,26 +319,26 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
pmulhw m13, m11, m2 ; m13 = m11*q>>16
|
pmulhw m13, m11, m2 ; m13 = m11*q>>16
|
||||||
psignw m14, m9 ; m14 = reinsert sign
|
psignw m14, m9 ; m14 = reinsert sign
|
||||||
psignw m13, m10 ; m13 = reinsert sign
|
psignw m13, m10 ; m13 = reinsert sign
|
||||||
mova [qcoeffq+ncoeffq*2+ 0], m14
|
mova [r3q+ncoeffq*2+ 0], m14
|
||||||
mova [qcoeffq+ncoeffq*2+16], m13
|
mova [r3q+ncoeffq*2+16], m13
|
||||||
%ifidn %1, fp_32x32
|
%ifidn %1, fp_32x32
|
||||||
pabsw m14, m14
|
pabsw m14, m14
|
||||||
pabsw m13, m13
|
pabsw m13, m13
|
||||||
%endif
|
%endif
|
||||||
pmullw m14, m3 ; dqc[i] = qc[i] * q
|
pmullw m14, m3 ; r4[i] = r3[i] * q
|
||||||
pmullw m13, m3 ; dqc[i] = qc[i] * q
|
pmullw m13, m3 ; r4[i] = r3[i] * q
|
||||||
%ifidn %1, fp_32x32
|
%ifidn %1, fp_32x32
|
||||||
psrlw m14, 1
|
psrlw m14, 1
|
||||||
psrlw m13, 1
|
psrlw m13, 1
|
||||||
psignw m14, m9
|
psignw m14, m9
|
||||||
psignw m13, m10
|
psignw m13, m10
|
||||||
%endif
|
%endif
|
||||||
mova [dqcoeffq+ncoeffq*2+ 0], m14
|
mova [r4q+ncoeffq*2+ 0], m14
|
||||||
mova [dqcoeffq+ncoeffq*2+16], m13
|
mova [r4q+ncoeffq*2+16], m13
|
||||||
pcmpeqw m14, m5 ; m14 = c[i] == 0
|
pcmpeqw m14, m5 ; m14 = c[i] == 0
|
||||||
pcmpeqw m13, m5 ; m13 = c[i] == 0
|
pcmpeqw m13, m5 ; m13 = c[i] == 0
|
||||||
mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
|
mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
|
||||||
mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
|
mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
|
||||||
psubw m6, m7 ; m6 = scan[i] + 1
|
psubw m6, m7 ; m6 = scan[i] + 1
|
||||||
psubw m11, m7 ; m11 = scan[i] + 1
|
psubw m11, m7 ; m11 = scan[i] + 1
|
||||||
pandn m14, m6 ; m14 = max(eob)
|
pandn m14, m6 ; m14 = max(eob)
|
||||||
@ -350,10 +351,10 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
%ifidn %1, fp_32x32
|
%ifidn %1, fp_32x32
|
||||||
jmp .accumulate_eob
|
jmp .accumulate_eob
|
||||||
.skip_iter:
|
.skip_iter:
|
||||||
mova [qcoeffq+ncoeffq*2+ 0], m5
|
mova [r3q+ncoeffq*2+ 0], m5
|
||||||
mova [qcoeffq+ncoeffq*2+16], m5
|
mova [r3q+ncoeffq*2+16], m5
|
||||||
mova [dqcoeffq+ncoeffq*2+ 0], m5
|
mova [r4q+ncoeffq*2+ 0], m5
|
||||||
mova [dqcoeffq+ncoeffq*2+16], m5
|
mova [r4q+ncoeffq*2+16], m5
|
||||||
add ncoeffq, mmsize
|
add ncoeffq, mmsize
|
||||||
jl .ac_only_loop
|
jl .ac_only_loop
|
||||||
%endif
|
%endif
|
||||||
@ -368,7 +369,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
pshuflw m7, m8, 0x1
|
pshuflw m7, m8, 0x1
|
||||||
pmaxsw m8, m7
|
pmaxsw m8, m7
|
||||||
pextrw r6, m8, 0
|
pextrw r6, m8, 0
|
||||||
mov [r2], r6
|
mov [r2], r6
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; skip-block, i.e. just write all zeroes
|
; skip-block, i.e. just write all zeroes
|
||||||
@ -377,19 +378,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
|||||||
movifnidn ncoeffq, ncoeffmp
|
movifnidn ncoeffq, ncoeffmp
|
||||||
mov r2, qcoeffmp
|
mov r2, qcoeffmp
|
||||||
mov r3, eobmp
|
mov r3, eobmp
|
||||||
DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
|
|
||||||
lea dqcoeffq, [dqcoeffq+ncoeffq*2]
|
lea r0q, [r0q+ncoeffq*2]
|
||||||
lea qcoeffq, [ qcoeffq+ncoeffq*2]
|
lea r2q, [r2q+ncoeffq*2]
|
||||||
neg ncoeffq
|
neg ncoeffq
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
.blank_loop:
|
.blank_loop:
|
||||||
mova [dqcoeffq+ncoeffq*2+ 0], m7
|
mova [r0q+ncoeffq*2+ 0], m7
|
||||||
mova [dqcoeffq+ncoeffq*2+16], m7
|
mova [r0q+ncoeffq*2+16], m7
|
||||||
mova [qcoeffq+ncoeffq*2+ 0], m7
|
mova [r2q+ncoeffq*2+ 0], m7
|
||||||
mova [qcoeffq+ncoeffq*2+16], m7
|
mova [r2q+ncoeffq*2+16], m7
|
||||||
add ncoeffq, mmsize
|
add ncoeffq, mmsize
|
||||||
jl .blank_loop
|
jl .blank_loop
|
||||||
mov word [eobq], 0
|
mov word [r3q], 0
|
||||||
RET
|
RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user