neon fast quantize block pair

vp8_fast_quantize_b_pair_neon function added to quantize
two adjacent blocks at the same time to improve performance.
 - Additional 3-6% speedup compared to neon optimized fast
   quantizer (Tanya VGA@30fps, 1Mbps stream, cpu-used=-5..-16)

Change-Id: I3fcbf141e5d05e9118c38ca37310458afbabaa4e
This commit is contained in:
Tero Rintaluoma
2011-05-09 10:09:41 +03:00
parent 9e4f76c154
commit 61f0c090df
12 changed files with 289 additions and 19 deletions

View File

@@ -269,7 +269,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
#endif
void vp8_quantize_mby(MACROBLOCK *x)
void vp8_quantize_mby_c(MACROBLOCK *x)
{
int i;
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
@@ -282,7 +282,7 @@ void vp8_quantize_mby(MACROBLOCK *x)
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
}
void vp8_quantize_mb(MACROBLOCK *x)
void vp8_quantize_mb_c(MACROBLOCK *x)
{
int i;
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
@@ -293,7 +293,7 @@ void vp8_quantize_mb(MACROBLOCK *x)
}
void vp8_quantize_mbuv(MACROBLOCK *x)
void vp8_quantize_mbuv_c(MACROBLOCK *x)
{
int i;
@@ -301,6 +301,22 @@ void vp8_quantize_mbuv(MACROBLOCK *x)
x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
}
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
* these two C functions if corresponding optimized routine is not available.
* NEON optimized version implements currently the fast quantization for pair
* of blocks. */
void vp8_regular_quantize_b_pair(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
{
vp8_regular_quantize_b(b1, d1);
vp8_regular_quantize_b(b2, d2);
}
void vp8_fast_quantize_b_pair_c(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
{
vp8_fast_quantize_b_c(b1, d1);
vp8_fast_quantize_b_c(b2, d2);
}
static const int qrounding_factors[129] =
{
@@ -715,3 +731,4 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
vp8cx_init_quantizer(cpi);
}