Merge "Re-design quantization process for 32x32 transform block"
This commit is contained in:
commit
f6bf614b2f
@ -717,6 +717,9 @@ specialize qw/vp9_subtract_block/, "$sse2_x86inc";
|
||||
add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_fp/, "$ssse3_x86_64";
|
||||
|
||||
add_proto qw/void vp9_quantize_fp_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64";
|
||||
|
||||
add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
|
||||
|
||||
|
@ -320,10 +320,10 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
|
||||
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
break;
|
||||
case TX_16X16:
|
||||
vp9_fdct16x16(src_diff, coeff, diff_stride);
|
||||
|
@ -104,6 +104,49 @@ void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
// TODO(jingning) Refactor this file and combine functions with similar
|
||||
// operations.
|
||||
void vp9_quantize_fp_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
int i, eob = -1;
|
||||
(void)zbin_ptr;
|
||||
(void)quant_shift_ptr;
|
||||
(void)zbin_oq_value;
|
||||
(void)iscan;
|
||||
|
||||
vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
|
||||
vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
|
||||
|
||||
if (!skip_block) {
|
||||
for (i = 0; i < n_coeffs; i++) {
|
||||
const int rc = scan[i];
|
||||
const int coeff = coeff_ptr[rc];
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
int tmp = 0;
|
||||
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
|
||||
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
|
||||
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
|
||||
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
|
||||
tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
|
||||
}
|
||||
|
||||
if (tmp)
|
||||
eob = i;
|
||||
}
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
|
@ -253,6 +253,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
||||
}
|
||||
|
||||
if (speed >= 5) {
|
||||
sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
|
||||
sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ?
|
||||
RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
|
||||
sf->max_partition_size = BLOCK_32X32;
|
||||
@ -287,7 +288,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
||||
sf->mv.reduce_first_step_size = 1;
|
||||
}
|
||||
if (speed >= 7) {
|
||||
sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
|
||||
sf->mv.fullpel_search_step_param = 10;
|
||||
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
|
||||
sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
|
||||
|
@ -234,21 +234,18 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
movifnidn quantq, quantmp
|
||||
mova m1, [roundq] ; m1 = round
|
||||
mova m2, [quantq] ; m2 = quant
|
||||
%ifidn %1, b_32x32
|
||||
; TODO(jingning) to be continued with 32x32 quantization process
|
||||
%ifidn %1, fp_32x32
|
||||
pcmpeqw m5, m5
|
||||
psrlw m5, 15
|
||||
paddw m0, m5
|
||||
paddw m1, m5
|
||||
psrlw m0, 1 ; m0 = (m0 + 1) / 2
|
||||
psrlw m1, 1 ; m1 = (m1 + 1) / 2
|
||||
%endif
|
||||
mova m3, [r2q] ; m3 = dequant
|
||||
mov r3, qcoeffmp
|
||||
mov r4, dqcoeffmp
|
||||
mov r5, iscanmp
|
||||
%ifidn %1, b_32x32
|
||||
psllw m4, 1
|
||||
%ifidn %1, fp_32x32
|
||||
psllw m2, 1
|
||||
%endif
|
||||
pxor m5, m5 ; m5 = dedicated zero
|
||||
DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
|
||||
@ -275,18 +272,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
psignw m13, m10 ; m13 = reinsert sign
|
||||
mova [qcoeffq+ncoeffq*2+ 0], m8
|
||||
mova [qcoeffq+ncoeffq*2+16], m13
|
||||
%ifidn %1, b_32x32
|
||||
%ifidn %1, fp_32x32
|
||||
pabsw m8, m8
|
||||
pabsw m13, m13
|
||||
%endif
|
||||
pmullw m8, m3 ; dqc[i] = qc[i] * q
|
||||
punpckhqdq m3, m3
|
||||
pmullw m13, m3 ; dqc[i] = qc[i] * q
|
||||
%ifidn %1, b_32x32
|
||||
%ifidn %1, fp_32x32
|
||||
psrlw m8, 1
|
||||
psrlw m13, 1
|
||||
psignw m8, m9
|
||||
psignw m13, m10
|
||||
psrlw m0, m3, 2
|
||||
%endif
|
||||
mova [dqcoeffq+ncoeffq*2+ 0], m8
|
||||
mova [dqcoeffq+ncoeffq*2+16], m13
|
||||
@ -307,13 +305,17 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
|
||||
pabsw m6, m9 ; m6 = abs(m9)
|
||||
pabsw m11, m10 ; m11 = abs(m10)
|
||||
pcmpeqw m7, m7
|
||||
%ifidn %1, b_32x32
|
||||
%ifidn %1, fp_32x32
|
||||
pcmpgtw m7, m6, m0
|
||||
pcmpgtw m12, m11, m0
|
||||
pmovmskb r6, m7
|
||||
pmovmskb r2, m7
|
||||
pmovmskb r2, m12
|
||||
|
||||
or r6, r2
|
||||
jz .skip_iter
|
||||
%endif
|
||||
pcmpeqw m7, m7
|
||||
|
||||
paddsw m6, m1 ; m6 += round
|
||||
paddsw m11, m1 ; m11 += round
|
||||
pmulhw m14, m6, m2 ; m14 = m6*q>>16
|
||||
@ -322,13 +324,13 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
psignw m13, m10 ; m13 = reinsert sign
|
||||
mova [qcoeffq+ncoeffq*2+ 0], m14
|
||||
mova [qcoeffq+ncoeffq*2+16], m13
|
||||
%ifidn %1, b_32x32
|
||||
%ifidn %1, fp_32x32
|
||||
pabsw m14, m14
|
||||
pabsw m13, m13
|
||||
%endif
|
||||
pmullw m14, m3 ; dqc[i] = qc[i] * q
|
||||
pmullw m13, m3 ; dqc[i] = qc[i] * q
|
||||
%ifidn %1, b_32x32
|
||||
%ifidn %1, fp_32x32
|
||||
psrlw m14, 1
|
||||
psrlw m13, 1
|
||||
psignw m14, m9
|
||||
@ -349,7 +351,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
add ncoeffq, mmsize
|
||||
jl .ac_only_loop
|
||||
|
||||
%ifidn %1, b_32x32
|
||||
%ifidn %1, fp_32x32
|
||||
jmp .accumulate_eob
|
||||
.skip_iter:
|
||||
mova [qcoeffq+ncoeffq*2+ 0], m5
|
||||
@ -397,3 +399,4 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
||||
|
||||
INIT_XMM ssse3
|
||||
QUANTIZE_FP fp, 7
|
||||
QUANTIZE_FP fp_32x32, 7
|
||||
|
Loading…
Reference in New Issue
Block a user