Add Neon intrinsic vp9_fdct8x8_quant_neon
On Nexus 7 speed -5 got ~2%, -6 got ~15%, -7 and -8 got ~30% increase in perf. Tested on Nexus 7, built with ndk r10d, gcc 4.9. Change-Id: I83246d63b96674d170098a572fa4fe28a05aaf51
This commit is contained in:
parent
643c75d90b
commit
9f6eba419a
@ -1166,7 +1166,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
|
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||||
specialize qw/vp9_fdct8x8_quant sse2 ssse3/;
|
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
|
||||||
}
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -32,6 +32,24 @@ void vp9_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
|
||||||
|
int16_t* coeff_ptr, intptr_t n_coeffs,
|
||||||
|
int skip_block, const int16_t* zbin_ptr,
|
||||||
|
const int16_t* round_ptr, const int16_t* quant_ptr,
|
||||||
|
const int16_t* quant_shift_ptr,
|
||||||
|
int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
|
||||||
|
const int16_t* dequant_ptr, uint16_t* eob_ptr,
|
||||||
|
const int16_t* scan_ptr,
|
||||||
|
const int16_t* iscan_ptr) {
|
||||||
|
int16_t temp_buffer[64];
|
||||||
|
(void)coeff_ptr;
|
||||||
|
|
||||||
|
vp9_fdct8x8_neon(input, temp_buffer, stride);
|
||||||
|
vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
|
||||||
|
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
|
||||||
|
dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
|
void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
|
||||||
int i;
|
int i;
|
||||||
// stage 1
|
// stage 1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user