From b1e97c6a25d53fb2e62e2fb857fbf146bb19cbd3 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 20:26:54 -0800 Subject: [PATCH] vpx_lpf_horizontal_4: remove unused count param Change-Id: Iec7d8eda343991f7d7d46931dca17af23c821d11 --- test/lpf_8_test.cc | 13 ++++++++----- vp10/common/loopfilter.c | 16 ++++++++-------- vp9/common/vp9_loopfilter.c | 16 ++++++++-------- vpx_dsp/arm/loopfilter_4_neon.asm | 16 +--------------- vpx_dsp/arm/loopfilter_4_neon.c | 8 ++------ vpx_dsp/loopfilter.c | 8 ++++---- vpx_dsp/mips/loopfilter_4_msa.c | 5 +---- vpx_dsp/mips/loopfilter_filters_dspr2.c | 7 +++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/loopfilter_mmx.asm | 12 ++---------- 10 files changed, 38 insertions(+), 65 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 394360e2f..e6fe1e508 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -458,7 +458,8 @@ using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( MMX, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1))); #endif // HAVE_MMX @@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_lpf_horizontal_4_neon, - &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1))); INSTANTIATE_TEST_CASE_P( @@ -633,7 +634,8 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( DSPR2, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_16_dspr2, @@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 6bbf191ac..1f7ce981f 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { @@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { @@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4_int & 1) { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } s += 8 * count; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index d5431c2c2..7cc833e19 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { @@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { @@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4_int & 1) { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } s += 8 * count; diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm index d794f552a..937115898 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.asm +++ b/vpx_dsp/arm/loopfilter_4_neon.asm @@ -16,37 +16,28 @@ ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_horizontal_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_horizontal_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #8] ; load count ldr r2, [sp, #4] ; load thresh add r1, r1, r1 ; double pitch - cmp r12, #0 - beq end_vpx_lf_h_edge - vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh -count_lf_h_loop sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines add r3, r2, r1, lsr #1 ; set to 3 lines down @@ -69,11 +60,6 @@ count_lf_h_loop vst1.u8 {d6}, [r2@64], r1 ; store oq0 vst1.u8 {d7}, [r3@64], r1 ; store oq1 - add r0, r0, #8 - subs r12, r12, #1 - bne count_lf_h_loop - -end_vpx_lf_h_edge pop {pc} ENDP ; |vpx_lpf_horizontal_4_neon| diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c index db9ea6a9d..7f3ee70b9 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.c +++ b/vpx_dsp/arm/loopfilter_4_neon.c @@ -115,22 +115,18 @@ void vpx_lpf_horizontal_4_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i; uint8_t *s, *psrc; uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - if (count == 0) // end_vpx_lf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); psrc = src - (pitch << 2); - for (i = 0; i < count; i++) { + for (i = 0; i < 1; i++) { s = psrc + i * 8; d3u8 = vld1_u8(s); diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index e8092d912..e545d36ab 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -119,12 +119,12 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, int count) { + const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; const int8_t mask = filter_mask(*limit, *blimit, @@ -138,8 +138,8 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c index ebeaddd21..936347031 100644 --- a/vpx_dsp/mips/loopfilter_4_msa.c +++ b/vpx_dsp/mips/loopfilter_4_msa.c @@ -13,14 +13,11 @@ void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { uint64_t p1_d, p0_d, q0_d, q1_d; v16u8 mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out; - (void)count; - /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c index 8a24372cb..8414b9ed5 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -23,8 +23,7 @@ void vpx_lpf_horizontal_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask; uint32_t hev; @@ -312,8 +311,8 @@ void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 3f63a5f62..36c89db8f 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -559,7 +559,7 @@ add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon; -add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/; add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm index dee565ce0..15105e3ed 100644 --- a/vpx_dsp/x86/loopfilter_mmx.asm +++ b/vpx_dsp/x86/loopfilter_mmx.asm @@ -18,14 +18,13 @@ ; int src_pixel_step, ; const char *blimit, ; const char *limit, -; const char *thresh, -; int count +; const char *thresh ;) global sym(vpx_lpf_horizontal_4_mmx) PRIVATE sym(vpx_lpf_horizontal_4_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi @@ -39,8 +38,6 @@ sym(vpx_lpf_horizontal_4_mmx): mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - movsxd rcx, dword ptr arg(5) ;count -.next8_h: mov rdx, arg(3) ;limit movq mm7, [rdx] mov rdi, rsi ; rdi points to row +1 for indirect addressing @@ -208,11 +205,6 @@ sym(vpx_lpf_horizontal_4_mmx): pxor mm7, [GLOBAL(t80)] ; unoffset movq [rdi], mm7 ; write back - add rsi,8 - neg rax - dec rcx - jnz .next8_h - add rsp, 32 pop rsp ; begin epilog