Add Neon intrinsic vp9_fdct8x8_quant_neon

On Nexus 7 speed -5 got ~2%, -6 got ~15%, -7 and -8 got ~30%
increase in perf.

Tested on Nexus 7, built with ndk r10d, gcc 4.9.

Change-Id: I83246d63b96674d170098a572fa4fe28a05aaf51
This commit is contained in:
Frank Galligan 2015-01-15 19:29:46 -08:00
parent 643c75d90b
commit 9f6eba419a
2 changed files with 19 additions and 1 deletions

View File

@ -1166,7 +1166,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_fdct8x8_quant sse2 ssse3/;
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
}
#

View File

@ -32,6 +32,24 @@ void vp9_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) {
}
}
void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
const int16_t* quant_shift_ptr,
int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr,
const int16_t* dequant_ptr, uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
int16_t temp_buffer[64];
(void)coeff_ptr;
vp9_fdct8x8_neon(input, temp_buffer, stride);
vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
}
void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
int i;
// stage 1