neon fast quantize block pair

vp8_fast_quantize_b_pair_neon function added to quantize two adjacent blocks at the same time to improve performance. - Additional 3-6% speedup compared to neon optimized fast quantizer (Tanya VGA@30fps, 1Mbps stream, cpu-used=-5..-16) Change-Id: I3fcbf141e5d05e9118c38ca37310458afbabaa4e
2011-05-09 10:09:41 +03:00
parent 9e4f76c154
commit 61f0c090df
12 changed files with 289 additions and 19 deletions
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -269,7 +269,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)

 #endif

-void vp8_quantize_mby(MACROBLOCK *x)
+void vp8_quantize_mby_c(MACROBLOCK *x)
 {
    int i;
    int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
@@ -282,7 +282,7 @@ void vp8_quantize_mby(MACROBLOCK *x)
        x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
 }

-void vp8_quantize_mb(MACROBLOCK *x)
+void vp8_quantize_mb_c(MACROBLOCK *x)
 {
    int i;
    int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
@@ -293,7 +293,7 @@ void vp8_quantize_mb(MACROBLOCK *x)
 }


-void vp8_quantize_mbuv(MACROBLOCK *x)
+void vp8_quantize_mbuv_c(MACROBLOCK *x)
 {
    int i;

@@ -301,6 +301,22 @@ void vp8_quantize_mbuv(MACROBLOCK *x)
        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
 }

+/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
+ * these two C functions if corresponding optimized routine is not available.
+ * NEON optimized version implements currently the fast quantization for pair
+ * of blocks. */
+void vp8_regular_quantize_b_pair(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
+{
+    vp8_regular_quantize_b(b1, d1);
+    vp8_regular_quantize_b(b2, d2);
+}
+
+void vp8_fast_quantize_b_pair_c(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
+{
+    vp8_fast_quantize_b_c(b1, d1);
+    vp8_fast_quantize_b_c(b2, d2);
+}
+

 static const int qrounding_factors[129] =
 {
@@ -715,3 +731,4 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q)
        vp8cx_init_quantizer(cpi);

 }
+