Merge "Unify loopfilter function names"

2016-09-30 15:58:08 +00:00 · 2016-09-30 15:58:08 +00:00 · 8c744fd978
commit 8c744fd978
parent c435b7fbdd 7f1f35183a
11 changed files with 97 additions and 102 deletions
--- a/test/lpf_test.cc
+++ b/test/lpf_test.cc
@ -402,10 +402,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 8),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_8_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 8),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 8),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@ -416,10 +416,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 10),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_8_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 10),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 10),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@ -430,10 +430,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 12),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_8_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 12),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 12),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@ -450,10 +450,9 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_sse2,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8),
@ -465,10 +464,10 @@ INSTANTIATE_TEST_CASE_P(
 #if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
 INSTANTIATE_TEST_CASE_P(
    AVX2, Loop8Test6Param,
-    ::testing::Values(make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
-                                 &vpx_lpf_horizontal_edge_8_c, 8),
-                      make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
-                                 &vpx_lpf_horizontal_edge_16_c, 8)));
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_16_avx2,
+                                 &vpx_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_16_dual_avx2,
+                                 &vpx_lpf_horizontal_16_dual_c, 8)));
 #endif

 #if HAVE_SSE2
@ -520,10 +519,9 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    NEON, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_edge_8_neon,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_neon,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_neon,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8),
        make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c,
                   8),
@ -550,8 +548,9 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8, &vpx_lpf_horizontal_edge_8, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16, &vpx_lpf_horizontal_edge_16, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_dspr2,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8),
@ -576,10 +575,9 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8_msa, &vpx_lpf_horizontal_edge_8_c,
-                   8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_msa,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_msa,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8)));
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@ -465,12 +465,11 @@ static void filter_selectively_horiz(

      if (mask_16x16 & 1) {
        if ((mask_16x16 & 3) == 3) {
-          vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+          vpx_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
                                     lfi->hev_thr);
          count = 2;
        } else {
-          vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
-                                    lfi->hev_thr);
+          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
        }
      } else if (mask_8x8 & 1) {
        if ((mask_8x8 & 3) == 3) {
@ -559,12 +558,12 @@ static void highbd_filter_selectively_horiz(

      if (mask_16x16 & 1) {
        if ((mask_16x16 & 3) == 3) {
-          vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+          vpx_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
                                            lfi->hev_thr, bd);
          count = 2;
        } else {
-          vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
-                                           lfi->hev_thr, bd);
+          vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, bd);
        }
      } else if (mask_8x8 & 1) {
        if ((mask_8x8 & 3) == 3) {
--- a/vpx_dsp/arm/loopfilter_mb_neon.asm
+++ b/vpx_dsp/arm/loopfilter_mb_neon.asm
@ -8,8 +8,8 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;

-    EXPORT  |vpx_lpf_horizontal_edge_8_neon|
-    EXPORT  |vpx_lpf_horizontal_edge_16_neon|
+    EXPORT  |vpx_lpf_horizontal_16_neon|
+    EXPORT  |vpx_lpf_horizontal_16_dual_neon|
    EXPORT  |vpx_lpf_vertical_16_neon|
    EXPORT  |vpx_lpf_vertical_16_dual_neon|
    ARM
@ -119,7 +119,7 @@ h_next

    ENDP        ; |mb_lpf_horizontal_edge|

-; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
+; void vpx_lpf_horizontal_16_neon(uint8_t *s, int pitch,
 ;                                     const uint8_t *blimit,
 ;                                     const uint8_t *limit,
 ;                                     const uint8_t *thresh)
@ -128,12 +128,12 @@ h_next
 ; r2    const uint8_t *blimit,
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh
-|vpx_lpf_horizontal_edge_8_neon| PROC
+|vpx_lpf_horizontal_16_neon| PROC
    mov r12, #1
    b mb_lpf_horizontal_edge
-    ENDP        ; |vpx_lpf_horizontal_edge_8_neon|
+    ENDP        ; |vpx_lpf_horizontal_16_neon|

-; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
+; void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int pitch,
 ;                                      const uint8_t *blimit,
 ;                                      const uint8_t *limit,
 ;                                      const uint8_t *thresh)
@ -142,10 +142,10 @@ h_next
 ; r2    const uint8_t *blimit,
 ; r3    const uint8_t *limit,
 ; sp    const uint8_t *thresh
-|vpx_lpf_horizontal_edge_16_neon| PROC
+|vpx_lpf_horizontal_16_dual_neon| PROC
    mov r12, #2
    b mb_lpf_horizontal_edge
-    ENDP        ; |vpx_lpf_horizontal_edge_16_neon|
+    ENDP        ; |vpx_lpf_horizontal_16_dual_neon|

 ; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
 ;                             const uint8_t *limit, const uint8_t *thresh,
--- a/vpx_dsp/arm/loopfilter_mb_neon.c
+++ b/vpx_dsp/arm/loopfilter_mb_neon.c
@ -975,9 +975,8 @@ FUN_LPF_16_KERNEL(_, 8)        // lpf_16_kernel
 FUN_LPF_16_KERNEL(_dual_, 16)  // lpf_16_dual_kernel
 #undef FUN_LPF_16_KERNEL

-void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int p, const uint8_t *blimit,
-                                    const uint8_t *limit,
-                                    const uint8_t *thresh) {
+void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, const uint8_t *blimit,
+                                const uint8_t *limit, const uint8_t *thresh) {
  uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6,
      op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6;
  uint32_t flat_status, flat2_status;
@ -992,7 +991,7 @@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int p, const uint8_t *blimit,
             oq5, oq6, flat_status, flat2_status);
 }

-void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
                                     const uint8_t *limit,
                                     const uint8_t *thresh) {
  uint8x16_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7,
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@ -308,12 +308,12 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
  }
 }

-void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
-                                 const uint8_t *limit, const uint8_t *thresh) {
+void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                             const uint8_t *limit, const uint8_t *thresh) {
  mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
 }

-void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_horizontal_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
                                  const uint8_t *limit, const uint8_t *thresh) {
  mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
 }
@ -673,14 +673,13 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
  }
 }

-void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p,
-                                        const uint8_t *blimit,
-                                        const uint8_t *limit,
-                                        const uint8_t *thresh, int bd) {
+void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
+                                    const uint8_t *limit, const uint8_t *thresh,
+                                    int bd) {
  highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
 }

-void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int p,
                                         const uint8_t *blimit,
                                         const uint8_t *limit,
                                         const uint8_t *thresh, int bd) {
--- a/vpx_dsp/mips/loopfilter_16_msa.c
+++ b/vpx_dsp/mips/loopfilter_16_msa.c
@ -403,10 +403,11 @@ void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
  }
 }

-void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
-                                    const uint8_t *b_limit_ptr,
-                                    const uint8_t *limit_ptr,
-                                    const uint8_t *thresh_ptr, int32_t count) {
+static void mb_lpf_horizontal_edge_dual(uint8_t *src, int32_t pitch,
+                                        const uint8_t *b_limit_ptr,
+                                        const uint8_t *limit_ptr,
+                                        const uint8_t *thresh_ptr,
+                                        int32_t count) {
  DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]);
  uint8_t early_exit = 0;

@ -638,19 +639,19 @@ static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
      }
    }
  } else {
-    vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
-                                   thresh_ptr, count);
+    mb_lpf_horizontal_edge_dual(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr,
+                                count);
  }
 }

-void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch,
-                                   const uint8_t *b_limit_ptr,
-                                   const uint8_t *limit_ptr,
-                                   const uint8_t *thresh_ptr) {
+void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
+                               const uint8_t *b_limit_ptr,
+                               const uint8_t *limit_ptr,
+                               const uint8_t *thresh_ptr) {
  mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
 }

-void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
                                    const uint8_t *b_limit_ptr,
                                    const uint8_t *limit_ptr,
                                    const uint8_t *thresh_ptr) {
--- a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
@ -717,14 +717,13 @@ static void mb_lpf_horizontal_edge(unsigned char *s, int pitch,
  }
 }

-void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch,
-                                     const uint8_t *blimit,
-                                     const uint8_t *limit,
-                                     const uint8_t *thresh) {
+void vpx_lpf_horizontal_16_dspr2(unsigned char *s, int pitch,
+                                 const uint8_t *blimit, const uint8_t *limit,
+                                 const uint8_t *thresh) {
  mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
 }

-void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch,
+void vpx_lpf_horizontal_16_dual_dspr2(unsigned char *s, int pitch,
                                      const uint8_t *blimit,
                                      const uint8_t *limit,
                                      const uint8_t *thresh) {
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@ -516,11 +516,11 @@ specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/;
 add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;

-add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon dspr2 msa/;
+add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon dspr2 msa/;

-add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon dspr2 msa/;
+add_proto qw/void vpx_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_16_dual sse2 avx2 neon dspr2 msa/;

 add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
@ -553,11 +553,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;

-  add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-  specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/;
+  add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;

-  add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-  specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/;
+  add_proto qw/void vpx_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/vpx_highbd_lpf_horizontal_16_dual sse2/;

  add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
--- a/vpx_dsp/x86/highbd_loopfilter_sse2.c
+++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c
@ -48,10 +48,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {

 // TODO(debargha, peter): Break up large functions into smaller ones
 // in this file.
-void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
-                                           const uint8_t *_blimit,
-                                           const uint8_t *_limit,
-                                           const uint8_t *_thresh, int bd) {
+void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
+                                       const uint8_t *_blimit,
+                                       const uint8_t *_limit,
+                                       const uint8_t *_thresh, int bd) {
  const __m128i zero = _mm_set1_epi16(0);
  const __m128i one = _mm_set1_epi16(1);
  __m128i blimit, limit, thresh;
@ -475,12 +475,12 @@ void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
  _mm_store_si128((__m128i *)(s - 0 * p), q0);
 }

-void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_16_dual_sse2(uint16_t *s, int p,
                                            const uint8_t *_blimit,
                                            const uint8_t *_limit,
                                            const uint8_t *_thresh, int bd) {
-  vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd);
-  vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
+  vpx_highbd_lpf_horizontal_16_sse2(s, p, _blimit, _limit, _thresh, bd);
+  vpx_highbd_lpf_horizontal_16_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
 }

 void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
@ -1108,8 +1108,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
  highbd_transpose(src, p, dst, 8, 2);

  // Loop filtering
-  vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh,
-                                        bd);
+  vpx_highbd_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh,
+                                    bd);
  src[0] = t_dst;
  src[1] = t_dst + 8 * 8;
  dst[0] = s - 8;
@ -1130,7 +1130,7 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p,
  highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);

  //  Loop filtering
-  vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
+  vpx_highbd_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit,
                                         thresh, bd);

  //  Transpose back
--- a/vpx_dsp/x86/loopfilter_avx2.c
+++ b/vpx_dsp/x86/loopfilter_avx2.c
@ -13,10 +13,10 @@
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_ports/mem.h"

-void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p,
-                                    const unsigned char *_blimit,
-                                    const unsigned char *_limit,
-                                    const unsigned char *_thresh) {
+void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
+                                const unsigned char *_blimit,
+                                const unsigned char *_limit,
+                                const unsigned char *_thresh) {
  __m128i mask, hev, flat, flat2;
  const __m128i zero = _mm_set1_epi16(0);
  const __m128i one = _mm_set1_epi8(1);
@ -367,7 +367,7 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = {
  8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
 };

-void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p,
+void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p,
                                     const unsigned char *_blimit,
                                     const unsigned char *_limit,
                                     const unsigned char *_thresh) {
--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@ -229,10 +229,10 @@ void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
  *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
 }

-void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
-                                    const unsigned char *_blimit,
-                                    const unsigned char *_limit,
-                                    const unsigned char *_thresh) {
+void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
+                                const unsigned char *_blimit,
+                                const unsigned char *_limit,
+                                const unsigned char *_thresh) {
  const __m128i zero = _mm_set1_epi16(0);
  const __m128i one = _mm_set1_epi8(1);
  const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@ -591,7 +591,7 @@ static INLINE __m128i filter16_mask(const __m128i *const flat,
  return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
 }

-void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
+void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p,
                                     const unsigned char *_blimit,
                                     const unsigned char *_limit,
                                     const unsigned char *_thresh) {
@ -1745,7 +1745,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
  transpose(src, p, dst, 8, 2);

  // Loop filtering
-  vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
+  vpx_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);

  src[0] = t_dst;
  src[1] = t_dst + 8 * 8;
@ -1766,7 +1766,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
  transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);

  // Loop filtering
-  vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
+  vpx_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);

  // Transpose back
  transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);