Merge "Fix overflow issue in SSSE3 32x32 quantization"
This commit is contained in:
@@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b
|
||||
specialize vp9_quantize_b $ssse3_x86_64
|
||||
|
||||
prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
|
||||
specialize vp9_quantize_b_32x32
|
||||
specialize vp9_quantize_b_32x32 $ssse3_x86_64
|
||||
|
||||
#
|
||||
# Structured Similarity (SSIM)
|
||||
|
||||
@@ -84,7 +84,6 @@ void vp9_quantize_b_c(int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
// This function works well for large transform size.
|
||||
void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
int16_t *zbin_ptr, int16_t *round_ptr,
|
||||
@@ -105,8 +104,8 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
|
||||
eob = -1;
|
||||
|
||||
// Base ZBIN
|
||||
zbins[0] = zbin_ptr[0] + zbin_oq_value;
|
||||
zbins[1] = zbin_ptr[1] + zbin_oq_value;
|
||||
zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
|
||||
zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
|
||||
nzbins[0] = zbins[0] * -1;
|
||||
nzbins[1] = zbins[1] * -1;
|
||||
|
||||
@@ -114,7 +113,7 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
|
||||
// Pre-scan pass
|
||||
for (i = 0; i < n_coeffs; i++) {
|
||||
rc = scan[i];
|
||||
z = coeff_ptr[rc] * 2;
|
||||
z = coeff_ptr[rc];
|
||||
|
||||
// If the coefficient is out of the base ZBIN range, keep it for
|
||||
// quantization.
|
||||
@@ -130,14 +129,14 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
|
||||
// Calculate ZBIN
|
||||
zbin = (zbins[rc != 0]);
|
||||
|
||||
z = coeff_ptr[rc] * 2;
|
||||
z = coeff_ptr[rc];
|
||||
sz = (z >> 31); // sign of z
|
||||
x = (z ^ sz) - sz; // x = abs(z)
|
||||
|
||||
if (x >= zbin) {
|
||||
x += (round_ptr[rc != 0]);
|
||||
x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
|
||||
y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
|
||||
quant_shift_ptr[rc != 0]) >> 16; // quantize (x)
|
||||
quant_shift_ptr[rc != 0]) >> 15; // quantize (x)
|
||||
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
|
||||
@@ -36,6 +36,14 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
pshufd m4, m4, 0
|
||||
mova m2, [quantq] ; m2 = quant
|
||||
paddw m0, m4 ; m0 = zbin + zbin_oq
|
||||
%ifidn %1, b_32x32
|
||||
pcmpeqw m5, m5
|
||||
psrlw m5, 15
|
||||
paddw m0, m5
|
||||
paddw m1, m5
|
||||
psrlw m0, 1 ; m0 = (m0 + 1) / 2
|
||||
psrlw m1, 1 ; m1 = (m1 + 1) / 2
|
||||
%endif
|
||||
mova m3, [r2q] ; m3 = dequant
|
||||
psubw m0, [pw_1]
|
||||
mov r2, shiftmp
|
||||
@@ -43,6 +51,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
mova m4, [r2] ; m4 = shift
|
||||
mov r4, dqcoeffmp
|
||||
mov r5, iscanmp
|
||||
%ifidn %1, b_32x32
|
||||
psllw m4, 1
|
||||
%endif
|
||||
pxor m5, m5 ; m5 = dedicated zero
|
||||
DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
|
||||
lea coeffq, [ coeffq+ncoeffq*2]
|
||||
@@ -56,10 +67,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
|
||||
pabsw m6, m9 ; m6 = abs(m9)
|
||||
pabsw m11, m10 ; m11 = abs(m10)
|
||||
%ifidn %1, b_32x32
|
||||
paddw m6, m6
|
||||
paddw m11, m11
|
||||
%endif
|
||||
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
|
||||
punpckhqdq m0, m0
|
||||
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
|
||||
@@ -112,10 +119,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
|
||||
pabsw m6, m9 ; m6 = abs(m9)
|
||||
pabsw m11, m10 ; m11 = abs(m10)
|
||||
%ifidn %1, b_32x32
|
||||
paddw m6, m6
|
||||
paddw m11, m11
|
||||
%endif
|
||||
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
|
||||
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
|
||||
%ifidn %1, b_32x32
|
||||
@@ -164,6 +167,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
pmaxsw m8, m13
|
||||
add ncoeffq, mmsize
|
||||
jl .ac_only_loop
|
||||
|
||||
%ifidn %1, b_32x32
|
||||
jmp .accumulate_eob
|
||||
.skip_iter:
|
||||
|
||||
Reference in New Issue
Block a user