Unify loopfilter function names
Rename vpx_lpf_horizontal_edge_8() to vpx_lpf_horizontal_16(). Rename vpx_lpf_horizontal_edge_16() to vpx_lpf_horizontal_16_dual(). Change-Id: I798ca8fbbd657d06d3db2bfb0fb3321168f49e52
This commit is contained in:
		| @@ -402,10 +402,10 @@ INSTANTIATE_TEST_CASE_P( | |||||||
|                                  &vpx_highbd_lpf_vertical_4_c, 8), |                                  &vpx_highbd_lpf_vertical_4_c, 8), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_8_c, 8), |                                  &vpx_highbd_lpf_horizontal_8_c, 8), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_edge_8_c, 8), |                                  &vpx_highbd_lpf_horizontal_16_c, 8), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_edge_16_c, 8), |                                  &vpx_highbd_lpf_horizontal_16_dual_c, 8), | ||||||
|                       make_tuple(&vpx_highbd_lpf_vertical_8_sse2, |                       make_tuple(&vpx_highbd_lpf_vertical_8_sse2, | ||||||
|                                  &vpx_highbd_lpf_vertical_8_c, 8), |                                  &vpx_highbd_lpf_vertical_8_c, 8), | ||||||
|                       make_tuple(&vpx_highbd_lpf_vertical_16_sse2, |                       make_tuple(&vpx_highbd_lpf_vertical_16_sse2, | ||||||
| @@ -416,10 +416,10 @@ INSTANTIATE_TEST_CASE_P( | |||||||
|                                  &vpx_highbd_lpf_vertical_4_c, 10), |                                  &vpx_highbd_lpf_vertical_4_c, 10), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_8_c, 10), |                                  &vpx_highbd_lpf_horizontal_8_c, 10), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_edge_8_c, 10), |                                  &vpx_highbd_lpf_horizontal_16_c, 10), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_edge_16_c, 10), |                                  &vpx_highbd_lpf_horizontal_16_dual_c, 10), | ||||||
|                       make_tuple(&vpx_highbd_lpf_vertical_8_sse2, |                       make_tuple(&vpx_highbd_lpf_vertical_8_sse2, | ||||||
|                                  &vpx_highbd_lpf_vertical_8_c, 10), |                                  &vpx_highbd_lpf_vertical_8_c, 10), | ||||||
|                       make_tuple(&vpx_highbd_lpf_vertical_16_sse2, |                       make_tuple(&vpx_highbd_lpf_vertical_16_sse2, | ||||||
| @@ -430,10 +430,10 @@ INSTANTIATE_TEST_CASE_P( | |||||||
|                                  &vpx_highbd_lpf_vertical_4_c, 12), |                                  &vpx_highbd_lpf_vertical_4_c, 12), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_8_c, 12), |                                  &vpx_highbd_lpf_horizontal_8_c, 12), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_edge_8_c, 12), |                                  &vpx_highbd_lpf_horizontal_16_c, 12), | ||||||
|                       make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, |                       make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2, | ||||||
|                                  &vpx_highbd_lpf_horizontal_edge_16_c, 12), |                                  &vpx_highbd_lpf_horizontal_16_dual_c, 12), | ||||||
|                       make_tuple(&vpx_highbd_lpf_vertical_8_sse2, |                       make_tuple(&vpx_highbd_lpf_vertical_8_sse2, | ||||||
|                                  &vpx_highbd_lpf_vertical_8_c, 12), |                                  &vpx_highbd_lpf_vertical_8_c, 12), | ||||||
|                       make_tuple(&vpx_highbd_lpf_vertical_16_sse2, |                       make_tuple(&vpx_highbd_lpf_vertical_16_sse2, | ||||||
| @@ -450,10 +450,9 @@ INSTANTIATE_TEST_CASE_P( | |||||||
|     ::testing::Values( |     ::testing::Values( | ||||||
|         make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8), |         make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8), | ||||||
|         make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8), |         make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8), | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_8_sse2, |         make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8), | ||||||
|                    &vpx_lpf_horizontal_edge_8_c, 8), |         make_tuple(&vpx_lpf_horizontal_16_dual_sse2, | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_16_sse2, |                    &vpx_lpf_horizontal_16_dual_c, 8), | ||||||
|                    &vpx_lpf_horizontal_edge_16_c, 8), |  | ||||||
|         make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8), |         make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8), |         make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8), |         make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8), | ||||||
| @@ -465,10 +464,10 @@ INSTANTIATE_TEST_CASE_P( | |||||||
| #if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH) | #if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH) | ||||||
| INSTANTIATE_TEST_CASE_P( | INSTANTIATE_TEST_CASE_P( | ||||||
|     AVX2, Loop8Test6Param, |     AVX2, Loop8Test6Param, | ||||||
|     ::testing::Values(make_tuple(&vpx_lpf_horizontal_edge_8_avx2, |     ::testing::Values(make_tuple(&vpx_lpf_horizontal_16_avx2, | ||||||
|                                  &vpx_lpf_horizontal_edge_8_c, 8), |                                  &vpx_lpf_horizontal_16_c, 8), | ||||||
|                       make_tuple(&vpx_lpf_horizontal_edge_16_avx2, |                       make_tuple(&vpx_lpf_horizontal_16_dual_avx2, | ||||||
|                                  &vpx_lpf_horizontal_edge_16_c, 8))); |                                  &vpx_lpf_horizontal_16_dual_c, 8))); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #if HAVE_SSE2 | #if HAVE_SSE2 | ||||||
| @@ -520,10 +519,9 @@ INSTANTIATE_TEST_CASE_P( | |||||||
| INSTANTIATE_TEST_CASE_P( | INSTANTIATE_TEST_CASE_P( | ||||||
|     NEON, Loop8Test6Param, |     NEON, Loop8Test6Param, | ||||||
|     ::testing::Values( |     ::testing::Values( | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_8_neon, |         make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8), | ||||||
|                    &vpx_lpf_horizontal_edge_8_c, 8), |         make_tuple(&vpx_lpf_horizontal_16_dual_neon, | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_16_neon, |                    &vpx_lpf_horizontal_16_dual_c, 8), | ||||||
|                    &vpx_lpf_horizontal_edge_16_c, 8), |  | ||||||
|         make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8), |         make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c, |         make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c, | ||||||
|                    8), |                    8), | ||||||
| @@ -550,8 +548,9 @@ INSTANTIATE_TEST_CASE_P( | |||||||
|     ::testing::Values( |     ::testing::Values( | ||||||
|         make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8), |         make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8), | ||||||
|         make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8), |         make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8), | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_8, &vpx_lpf_horizontal_edge_8, 8), |         make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8), | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_16, &vpx_lpf_horizontal_edge_16, 8), |         make_tuple(&vpx_lpf_horizontal_16_dual_dspr2, | ||||||
|  |                    &vpx_lpf_horizontal_16_dual_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8), |         make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8), |         make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8), |         make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8), | ||||||
| @@ -576,10 +575,9 @@ INSTANTIATE_TEST_CASE_P( | |||||||
|     ::testing::Values( |     ::testing::Values( | ||||||
|         make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8), |         make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8), | ||||||
|         make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8), |         make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8), | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_8_msa, &vpx_lpf_horizontal_edge_8_c, |         make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8), | ||||||
|                    8), |         make_tuple(&vpx_lpf_horizontal_16_dual_msa, | ||||||
|         make_tuple(&vpx_lpf_horizontal_edge_16_msa, |                    &vpx_lpf_horizontal_16_dual_c, 8), | ||||||
|                    &vpx_lpf_horizontal_edge_16_c, 8), |  | ||||||
|         make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8), |         make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8), |         make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8), | ||||||
|         make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8))); |         make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8))); | ||||||
|   | |||||||
| @@ -465,12 +465,11 @@ static void filter_selectively_horiz( | |||||||
|  |  | ||||||
|       if (mask_16x16 & 1) { |       if (mask_16x16 & 1) { | ||||||
|         if ((mask_16x16 & 3) == 3) { |         if ((mask_16x16 & 3) == 3) { | ||||||
|           vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, |           vpx_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim, | ||||||
|                                      lfi->hev_thr); |                                      lfi->hev_thr); | ||||||
|           count = 2; |           count = 2; | ||||||
|         } else { |         } else { | ||||||
|           vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, |           vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); | ||||||
|                                     lfi->hev_thr); |  | ||||||
|         } |         } | ||||||
|       } else if (mask_8x8 & 1) { |       } else if (mask_8x8 & 1) { | ||||||
|         if ((mask_8x8 & 3) == 3) { |         if ((mask_8x8 & 3) == 3) { | ||||||
| @@ -559,12 +558,12 @@ static void highbd_filter_selectively_horiz( | |||||||
|  |  | ||||||
|       if (mask_16x16 & 1) { |       if (mask_16x16 & 1) { | ||||||
|         if ((mask_16x16 & 3) == 3) { |         if ((mask_16x16 & 3) == 3) { | ||||||
|           vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, |           vpx_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim, | ||||||
|                                             lfi->hev_thr, bd); |                                             lfi->hev_thr, bd); | ||||||
|           count = 2; |           count = 2; | ||||||
|         } else { |         } else { | ||||||
|           vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, |           vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, | ||||||
|                                            lfi->hev_thr, bd); |                                        lfi->hev_thr, bd); | ||||||
|         } |         } | ||||||
|       } else if (mask_8x8 & 1) { |       } else if (mask_8x8 & 1) { | ||||||
|         if ((mask_8x8 & 3) == 3) { |         if ((mask_8x8 & 3) == 3) { | ||||||
|   | |||||||
| @@ -8,8 +8,8 @@ | |||||||
| ;  be found in the AUTHORS file in the root of the source tree. | ;  be found in the AUTHORS file in the root of the source tree. | ||||||
| ; | ; | ||||||
|  |  | ||||||
|     EXPORT  |vpx_lpf_horizontal_edge_8_neon| |     EXPORT  |vpx_lpf_horizontal_16_neon| | ||||||
|     EXPORT  |vpx_lpf_horizontal_edge_16_neon| |     EXPORT  |vpx_lpf_horizontal_16_dual_neon| | ||||||
|     EXPORT  |vpx_lpf_vertical_16_neon| |     EXPORT  |vpx_lpf_vertical_16_neon| | ||||||
|     EXPORT  |vpx_lpf_vertical_16_dual_neon| |     EXPORT  |vpx_lpf_vertical_16_dual_neon| | ||||||
|     ARM |     ARM | ||||||
| @@ -119,7 +119,7 @@ h_next | |||||||
|  |  | ||||||
|     ENDP        ; |mb_lpf_horizontal_edge| |     ENDP        ; |mb_lpf_horizontal_edge| | ||||||
|  |  | ||||||
| ; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, | ; void vpx_lpf_horizontal_16_neon(uint8_t *s, int pitch, | ||||||
| ;                                     const uint8_t *blimit, | ;                                     const uint8_t *blimit, | ||||||
| ;                                     const uint8_t *limit, | ;                                     const uint8_t *limit, | ||||||
| ;                                     const uint8_t *thresh) | ;                                     const uint8_t *thresh) | ||||||
| @@ -128,12 +128,12 @@ h_next | |||||||
| ; r2    const uint8_t *blimit, | ; r2    const uint8_t *blimit, | ||||||
| ; r3    const uint8_t *limit, | ; r3    const uint8_t *limit, | ||||||
| ; sp    const uint8_t *thresh | ; sp    const uint8_t *thresh | ||||||
| |vpx_lpf_horizontal_edge_8_neon| PROC | |vpx_lpf_horizontal_16_neon| PROC | ||||||
|     mov r12, #1 |     mov r12, #1 | ||||||
|     b mb_lpf_horizontal_edge |     b mb_lpf_horizontal_edge | ||||||
|     ENDP        ; |vpx_lpf_horizontal_edge_8_neon| |     ENDP        ; |vpx_lpf_horizontal_16_neon| | ||||||
|  |  | ||||||
| ; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, | ; void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int pitch, | ||||||
| ;                                      const uint8_t *blimit, | ;                                      const uint8_t *blimit, | ||||||
| ;                                      const uint8_t *limit, | ;                                      const uint8_t *limit, | ||||||
| ;                                      const uint8_t *thresh) | ;                                      const uint8_t *thresh) | ||||||
| @@ -142,10 +142,10 @@ h_next | |||||||
| ; r2    const uint8_t *blimit, | ; r2    const uint8_t *blimit, | ||||||
| ; r3    const uint8_t *limit, | ; r3    const uint8_t *limit, | ||||||
| ; sp    const uint8_t *thresh | ; sp    const uint8_t *thresh | ||||||
| |vpx_lpf_horizontal_edge_16_neon| PROC | |vpx_lpf_horizontal_16_dual_neon| PROC | ||||||
|     mov r12, #2 |     mov r12, #2 | ||||||
|     b mb_lpf_horizontal_edge |     b mb_lpf_horizontal_edge | ||||||
|     ENDP        ; |vpx_lpf_horizontal_edge_16_neon| |     ENDP        ; |vpx_lpf_horizontal_16_dual_neon| | ||||||
|  |  | ||||||
| ; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, | ; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, | ||||||
| ;                             const uint8_t *limit, const uint8_t *thresh, | ;                             const uint8_t *limit, const uint8_t *thresh, | ||||||
|   | |||||||
| @@ -975,9 +975,8 @@ FUN_LPF_16_KERNEL(_, 8)        // lpf_16_kernel | |||||||
| FUN_LPF_16_KERNEL(_dual_, 16)  // lpf_16_dual_kernel | FUN_LPF_16_KERNEL(_dual_, 16)  // lpf_16_dual_kernel | ||||||
| #undef FUN_LPF_16_KERNEL | #undef FUN_LPF_16_KERNEL | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int p, const uint8_t *blimit, | void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, const uint8_t *blimit, | ||||||
|                                     const uint8_t *limit, |                                 const uint8_t *limit, const uint8_t *thresh) { | ||||||
|                                     const uint8_t *thresh) { |  | ||||||
|   uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, |   uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, | ||||||
|       op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; |       op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; | ||||||
|   uint32_t flat_status, flat2_status; |   uint32_t flat_status, flat2_status; | ||||||
| @@ -992,7 +991,7 @@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int p, const uint8_t *blimit, | |||||||
|              oq5, oq6, flat_status, flat2_status); |              oq5, oq6, flat_status, flat2_status); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int p, const uint8_t *blimit, | void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, | ||||||
|                                      const uint8_t *limit, |                                      const uint8_t *limit, | ||||||
|                                      const uint8_t *thresh) { |                                      const uint8_t *thresh) { | ||||||
|   uint8x16_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, |   uint8x16_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, | ||||||
|   | |||||||
| @@ -308,12 +308,12 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit, | void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, | ||||||
|                                  const uint8_t *limit, const uint8_t *thresh) { |                              const uint8_t *limit, const uint8_t *thresh) { | ||||||
|   mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); |   mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit, | void vpx_lpf_horizontal_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, | ||||||
|                                   const uint8_t *limit, const uint8_t *thresh) { |                                   const uint8_t *limit, const uint8_t *thresh) { | ||||||
|   mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); |   mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); | ||||||
| } | } | ||||||
| @@ -673,14 +673,13 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, | void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, | ||||||
|                                         const uint8_t *blimit, |                                     const uint8_t *limit, const uint8_t *thresh, | ||||||
|                                         const uint8_t *limit, |                                     int bd) { | ||||||
|                                         const uint8_t *thresh, int bd) { |  | ||||||
|   highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); |   highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, | void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int p, | ||||||
|                                          const uint8_t *blimit, |                                          const uint8_t *blimit, | ||||||
|                                          const uint8_t *limit, |                                          const uint8_t *limit, | ||||||
|                                          const uint8_t *thresh, int bd) { |                                          const uint8_t *thresh, int bd) { | ||||||
|   | |||||||
| @@ -403,10 +403,11 @@ void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, | static void mb_lpf_horizontal_edge_dual(uint8_t *src, int32_t pitch, | ||||||
|                                     const uint8_t *b_limit_ptr, |                                         const uint8_t *b_limit_ptr, | ||||||
|                                     const uint8_t *limit_ptr, |                                         const uint8_t *limit_ptr, | ||||||
|                                     const uint8_t *thresh_ptr, int32_t count) { |                                         const uint8_t *thresh_ptr, | ||||||
|  |                                         int32_t count) { | ||||||
|   DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]); |   DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]); | ||||||
|   uint8_t early_exit = 0; |   uint8_t early_exit = 0; | ||||||
|  |  | ||||||
| @@ -638,19 +639,19 @@ static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch, | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } else { |   } else { | ||||||
|     vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr, |     mb_lpf_horizontal_edge_dual(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, | ||||||
|                                    thresh_ptr, count); |                                 count); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch, | void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, | ||||||
|                                    const uint8_t *b_limit_ptr, |                                const uint8_t *b_limit_ptr, | ||||||
|                                    const uint8_t *limit_ptr, |                                const uint8_t *limit_ptr, | ||||||
|                                    const uint8_t *thresh_ptr) { |                                const uint8_t *thresh_ptr) { | ||||||
|   mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1); |   mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch, | void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, | ||||||
|                                     const uint8_t *b_limit_ptr, |                                     const uint8_t *b_limit_ptr, | ||||||
|                                     const uint8_t *limit_ptr, |                                     const uint8_t *limit_ptr, | ||||||
|                                     const uint8_t *thresh_ptr) { |                                     const uint8_t *thresh_ptr) { | ||||||
|   | |||||||
| @@ -717,14 +717,13 @@ static void mb_lpf_horizontal_edge(unsigned char *s, int pitch, | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch, | void vpx_lpf_horizontal_16_dspr2(unsigned char *s, int pitch, | ||||||
|                                      const uint8_t *blimit, |                                  const uint8_t *blimit, const uint8_t *limit, | ||||||
|                                      const uint8_t *limit, |                                  const uint8_t *thresh) { | ||||||
|                                      const uint8_t *thresh) { |  | ||||||
|   mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1); |   mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch, | void vpx_lpf_horizontal_16_dual_dspr2(unsigned char *s, int pitch, | ||||||
|                                       const uint8_t *blimit, |                                       const uint8_t *blimit, | ||||||
|                                       const uint8_t *limit, |                                       const uint8_t *limit, | ||||||
|                                       const uint8_t *thresh) { |                                       const uint8_t *thresh) { | ||||||
|   | |||||||
| @@ -522,11 +522,11 @@ specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/; | |||||||
| add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; | add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; | ||||||
| specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/; | specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/; | ||||||
|  |  | ||||||
| add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; | add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; | ||||||
| specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon dspr2 msa/; | specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon dspr2 msa/; | ||||||
|  |  | ||||||
| add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; | add_proto qw/void vpx_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; | ||||||
| specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon dspr2 msa/; | specialize qw/vpx_lpf_horizontal_16_dual sse2 avx2 neon dspr2 msa/; | ||||||
|  |  | ||||||
| add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; | add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; | ||||||
| specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/; | specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/; | ||||||
| @@ -559,11 +559,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { | |||||||
|   add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; |   add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; | ||||||
|   specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/; |   specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/; | ||||||
|  |  | ||||||
|   add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; |   add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; | ||||||
|   specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/; |   specialize qw/vpx_highbd_lpf_horizontal_16 sse2/; | ||||||
|  |  | ||||||
|   add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; |   add_proto qw/void vpx_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; | ||||||
|   specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/; |   specialize qw/vpx_highbd_lpf_horizontal_16_dual sse2/; | ||||||
|  |  | ||||||
|   add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; |   add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; | ||||||
|   specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; |   specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; | ||||||
|   | |||||||
| @@ -48,10 +48,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { | |||||||
|  |  | ||||||
| // TODO(debargha, peter): Break up large functions into smaller ones | // TODO(debargha, peter): Break up large functions into smaller ones | ||||||
| // in this file. | // in this file. | ||||||
| void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, | void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, | ||||||
|                                            const uint8_t *_blimit, |                                        const uint8_t *_blimit, | ||||||
|                                            const uint8_t *_limit, |                                        const uint8_t *_limit, | ||||||
|                                            const uint8_t *_thresh, int bd) { |                                        const uint8_t *_thresh, int bd) { | ||||||
|   const __m128i zero = _mm_set1_epi16(0); |   const __m128i zero = _mm_set1_epi16(0); | ||||||
|   const __m128i one = _mm_set1_epi16(1); |   const __m128i one = _mm_set1_epi16(1); | ||||||
|   __m128i blimit, limit, thresh; |   __m128i blimit, limit, thresh; | ||||||
| @@ -475,12 +475,12 @@ void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, | |||||||
|   _mm_store_si128((__m128i *)(s - 0 * p), q0); |   _mm_store_si128((__m128i *)(s - 0 * p), q0); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p, | void vpx_highbd_lpf_horizontal_16_dual_sse2(uint16_t *s, int p, | ||||||
|                                             const uint8_t *_blimit, |                                             const uint8_t *_blimit, | ||||||
|                                             const uint8_t *_limit, |                                             const uint8_t *_limit, | ||||||
|                                             const uint8_t *_thresh, int bd) { |                                             const uint8_t *_thresh, int bd) { | ||||||
|   vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd); |   vpx_highbd_lpf_horizontal_16_sse2(s, p, _blimit, _limit, _thresh, bd); | ||||||
|   vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd); |   vpx_highbd_lpf_horizontal_16_sse2(s + 8, p, _blimit, _limit, _thresh, bd); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, | void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, | ||||||
| @@ -1108,8 +1108,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, | |||||||
|   highbd_transpose(src, p, dst, 8, 2); |   highbd_transpose(src, p, dst, 8, 2); | ||||||
|  |  | ||||||
|   // Loop filtering |   // Loop filtering | ||||||
|   vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh, |   vpx_highbd_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh, | ||||||
|                                         bd); |                                     bd); | ||||||
|   src[0] = t_dst; |   src[0] = t_dst; | ||||||
|   src[1] = t_dst + 8 * 8; |   src[1] = t_dst + 8 * 8; | ||||||
|   dst[0] = s - 8; |   dst[0] = s - 8; | ||||||
| @@ -1130,7 +1130,7 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p, | |||||||
|   highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); |   highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); | ||||||
|  |  | ||||||
|   //  Loop filtering |   //  Loop filtering | ||||||
|   vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, |   vpx_highbd_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, | ||||||
|                                          thresh, bd); |                                          thresh, bd); | ||||||
|  |  | ||||||
|   //  Transpose back |   //  Transpose back | ||||||
|   | |||||||
| @@ -13,10 +13,10 @@ | |||||||
| #include "./vpx_dsp_rtcd.h" | #include "./vpx_dsp_rtcd.h" | ||||||
| #include "vpx_ports/mem.h" | #include "vpx_ports/mem.h" | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p, | void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, | ||||||
|                                     const unsigned char *_blimit, |                                 const unsigned char *_blimit, | ||||||
|                                     const unsigned char *_limit, |                                 const unsigned char *_limit, | ||||||
|                                     const unsigned char *_thresh) { |                                 const unsigned char *_thresh) { | ||||||
|   __m128i mask, hev, flat, flat2; |   __m128i mask, hev, flat, flat2; | ||||||
|   const __m128i zero = _mm_set1_epi16(0); |   const __m128i zero = _mm_set1_epi16(0); | ||||||
|   const __m128i one = _mm_set1_epi8(1); |   const __m128i one = _mm_set1_epi8(1); | ||||||
| @@ -367,7 +367,7 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { | |||||||
|   8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 |   8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 | ||||||
| }; | }; | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p, | void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, | ||||||
|                                      const unsigned char *_blimit, |                                      const unsigned char *_blimit, | ||||||
|                                      const unsigned char *_limit, |                                      const unsigned char *_limit, | ||||||
|                                      const unsigned char *_thresh) { |                                      const unsigned char *_thresh) { | ||||||
|   | |||||||
| @@ -229,10 +229,10 @@ void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, | |||||||
|   *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); |   *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p, | void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, | ||||||
|                                     const unsigned char *_blimit, |                                 const unsigned char *_blimit, | ||||||
|                                     const unsigned char *_limit, |                                 const unsigned char *_limit, | ||||||
|                                     const unsigned char *_thresh) { |                                 const unsigned char *_thresh) { | ||||||
|   const __m128i zero = _mm_set1_epi16(0); |   const __m128i zero = _mm_set1_epi16(0); | ||||||
|   const __m128i one = _mm_set1_epi8(1); |   const __m128i one = _mm_set1_epi8(1); | ||||||
|   const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); |   const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); | ||||||
| @@ -591,7 +591,7 @@ static INLINE __m128i filter16_mask(const __m128i *const flat, | |||||||
|   return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); |   return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); | ||||||
| } | } | ||||||
|  |  | ||||||
| void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p, | void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, | ||||||
|                                      const unsigned char *_blimit, |                                      const unsigned char *_blimit, | ||||||
|                                      const unsigned char *_limit, |                                      const unsigned char *_limit, | ||||||
|                                      const unsigned char *_thresh) { |                                      const unsigned char *_thresh) { | ||||||
| @@ -1745,7 +1745,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, | |||||||
|   transpose(src, p, dst, 8, 2); |   transpose(src, p, dst, 8, 2); | ||||||
|  |  | ||||||
|   // Loop filtering |   // Loop filtering | ||||||
|   vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); |   vpx_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); | ||||||
|  |  | ||||||
|   src[0] = t_dst; |   src[0] = t_dst; | ||||||
|   src[1] = t_dst + 8 * 8; |   src[1] = t_dst + 8 * 8; | ||||||
| @@ -1766,7 +1766,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, | |||||||
|   transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); |   transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); | ||||||
|  |  | ||||||
|   // Loop filtering |   // Loop filtering | ||||||
|   vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); |   vpx_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); | ||||||
|  |  | ||||||
|   // Transpose back |   // Transpose back | ||||||
|   transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); |   transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Linfeng Zhang
					Linfeng Zhang