NEON asm of vpx_lpf_{horizontal,vertical}_8_dual_neon()
Also expose the NEON intrinsics version. BUG=webm:1261, webm:1266. Change-Id: I8c4ae658467dcf66ebf7a75982b2ef712dbb4535
This commit is contained in:
parent
5d881770e5
commit
f9efbad392
@ -531,20 +531,16 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
make_tuple(&vpx_lpf_vertical_8_neon, &vpx_lpf_vertical_8_c, 8),
|
make_tuple(&vpx_lpf_vertical_8_neon, &vpx_lpf_vertical_8_c, 8),
|
||||||
make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8),
|
make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8),
|
||||||
make_tuple(&vpx_lpf_vertical_4_neon, &vpx_lpf_vertical_4_c, 8)));
|
make_tuple(&vpx_lpf_vertical_4_neon, &vpx_lpf_vertical_4_c, 8)));
|
||||||
INSTANTIATE_TEST_CASE_P(NEON, Loop8Test9Param,
|
INSTANTIATE_TEST_CASE_P(
|
||||||
::testing::Values(
|
NEON, Loop8Test9Param,
|
||||||
// Using #if inside the macro is unsupported on MSVS but the tests are not
|
::testing::Values(make_tuple(&vpx_lpf_horizontal_8_dual_neon,
|
||||||
// currently built for MSVS with ARM and NEON.
|
&vpx_lpf_horizontal_8_dual_c, 8),
|
||||||
#if HAVE_NEON_ASM
|
make_tuple(&vpx_lpf_vertical_8_dual_neon,
|
||||||
make_tuple(&vpx_lpf_horizontal_8_dual_neon,
|
&vpx_lpf_vertical_8_dual_c, 8),
|
||||||
&vpx_lpf_horizontal_8_dual_c, 8),
|
make_tuple(&vpx_lpf_horizontal_4_dual_neon,
|
||||||
make_tuple(&vpx_lpf_vertical_8_dual_neon,
|
&vpx_lpf_horizontal_4_dual_c, 8),
|
||||||
&vpx_lpf_vertical_8_dual_c, 8),
|
make_tuple(&vpx_lpf_vertical_4_dual_neon,
|
||||||
#endif // HAVE_NEON_ASM
|
&vpx_lpf_vertical_4_dual_c, 8)));
|
||||||
make_tuple(&vpx_lpf_horizontal_4_dual_neon,
|
|
||||||
&vpx_lpf_horizontal_4_dual_c, 8),
|
|
||||||
make_tuple(&vpx_lpf_vertical_4_dual_neon,
|
|
||||||
&vpx_lpf_vertical_4_dual_c, 8)));
|
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
#endif // HAVE_NEON
|
#endif // HAVE_NEON
|
||||||
|
|
||||||
|
@ -9,7 +9,9 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
EXPORT |vpx_lpf_horizontal_8_neon|
|
EXPORT |vpx_lpf_horizontal_8_neon|
|
||||||
|
EXPORT |vpx_lpf_horizontal_8_dual_neon|
|
||||||
EXPORT |vpx_lpf_vertical_8_neon|
|
EXPORT |vpx_lpf_vertical_8_neon|
|
||||||
|
EXPORT |vpx_lpf_vertical_8_dual_neon|
|
||||||
ARM
|
ARM
|
||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
@ -64,6 +66,38 @@
|
|||||||
|
|
||||||
ENDP ; |vpx_lpf_horizontal_8_neon|
|
ENDP ; |vpx_lpf_horizontal_8_neon|
|
||||||
|
|
||||||
|
;void vpx_lpf_horizontal_8_dual_neon(uint8_t *s,
|
||||||
|
; int p,
|
||||||
|
; const uint8_t *blimit0,
|
||||||
|
; const uint8_t *limit0,
|
||||||
|
; const uint8_t *thresh0,
|
||||||
|
; const uint8_t *blimit1,
|
||||||
|
; const uint8_t *limit1,
|
||||||
|
; const uint8_t *thresh1)
|
||||||
|
; r0 uint8_t *s,
|
||||||
|
; r1 int p, /* pitch */
|
||||||
|
; r2 const uint8_t *blimit0,
|
||||||
|
; r3 const uint8_t *limit0,
|
||||||
|
; sp const uint8_t *thresh0,
|
||||||
|
; sp + 4 const uint8_t *blimit1,
|
||||||
|
; sp + 8 const uint8_t *limit1,
|
||||||
|
; sp + 12 const uint8_t *thresh1,
|
||||||
|
|vpx_lpf_horizontal_8_dual_neon| PROC
|
||||||
|
push {r0-r1, lr}
|
||||||
|
ldr lr, [sp, #12]
|
||||||
|
push {lr} ; thresh0
|
||||||
|
bl vpx_lpf_horizontal_8_neon
|
||||||
|
|
||||||
|
ldr r2, [sp, #20] ; blimit1
|
||||||
|
ldr r3, [sp, #24] ; limit1
|
||||||
|
ldr lr, [sp, #28]
|
||||||
|
str lr, [sp, #16] ; thresh1
|
||||||
|
add sp, #4
|
||||||
|
pop {r0-r1, lr}
|
||||||
|
add r0, #8 ; s + 8
|
||||||
|
b vpx_lpf_horizontal_8_neon
|
||||||
|
ENDP ; |vpx_lpf_horizontal_8_dual_neon|
|
||||||
|
|
||||||
; void vpx_lpf_vertical_8_neon(uint8_t *s,
|
; void vpx_lpf_vertical_8_neon(uint8_t *s,
|
||||||
; int pitch,
|
; int pitch,
|
||||||
; const uint8_t *blimit,
|
; const uint8_t *blimit,
|
||||||
@ -139,6 +173,38 @@
|
|||||||
pop {r4-r5, pc}
|
pop {r4-r5, pc}
|
||||||
ENDP ; |vpx_lpf_vertical_8_neon|
|
ENDP ; |vpx_lpf_vertical_8_neon|
|
||||||
|
|
||||||
|
;void vpx_lpf_vertical_8_dual_neon(uint8_t *s,
|
||||||
|
; int pitch,
|
||||||
|
; const uint8_t *blimit0,
|
||||||
|
; const uint8_t *limit0,
|
||||||
|
; const uint8_t *thresh0,
|
||||||
|
; const uint8_t *blimit1,
|
||||||
|
; const uint8_t *limit1,
|
||||||
|
; const uint8_t *thresh1)
|
||||||
|
; r0 uint8_t *s,
|
||||||
|
; r1 int pitch
|
||||||
|
; r2 const uint8_t *blimit0,
|
||||||
|
; r3 const uint8_t *limit0,
|
||||||
|
; sp const uint8_t *thresh0,
|
||||||
|
; sp + 4 const uint8_t *blimit1,
|
||||||
|
; sp + 8 const uint8_t *limit1,
|
||||||
|
; sp + 12 const uint8_t *thresh1,
|
||||||
|
|vpx_lpf_vertical_8_dual_neon| PROC
|
||||||
|
push {r0-r1, lr}
|
||||||
|
ldr lr, [sp, #12]
|
||||||
|
push {lr} ; thresh0
|
||||||
|
bl vpx_lpf_vertical_8_neon
|
||||||
|
|
||||||
|
ldr r2, [sp, #20] ; blimit1
|
||||||
|
ldr r3, [sp, #24] ; limit1
|
||||||
|
ldr lr, [sp, #28]
|
||||||
|
str lr, [sp, #16] ; thresh1
|
||||||
|
add sp, #4
|
||||||
|
pop {r0-r1, lr}
|
||||||
|
add r0, r1, lsl #3 ; s + 8 * pitch
|
||||||
|
b vpx_lpf_vertical_8_neon
|
||||||
|
ENDP ; |vpx_lpf_vertical_8_dual_neon|
|
||||||
|
|
||||||
; void vpx_mbloop_filter_neon();
|
; void vpx_mbloop_filter_neon();
|
||||||
; This is a helper function for the loopfilters. The invidual functions do the
|
; This is a helper function for the loopfilters. The invidual functions do the
|
||||||
; necessary load, transpose (if necessary) and store. The function does not use
|
; necessary load, transpose (if necessary) and store. The function does not use
|
||||||
|
@ -311,6 +311,14 @@ void vpx_lpf_horizontal_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_horizontal_8_dual_neon(
|
||||||
|
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
||||||
|
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
||||||
|
const uint8_t *limit1, const uint8_t *thresh1) {
|
||||||
|
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
|
||||||
|
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
|
||||||
|
}
|
||||||
|
|
||||||
void vpx_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
void vpx_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
int i;
|
int i;
|
||||||
@ -427,3 +435,11 @@ void vpx_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
||||||
|
const uint8_t *limit0, const uint8_t *thresh0,
|
||||||
|
const uint8_t *blimit1, const uint8_t *limit1,
|
||||||
|
const uint8_t *thresh1) {
|
||||||
|
vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
|
||||||
|
vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||||
|
}
|
||||||
|
@ -21,21 +21,3 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
|||||||
vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
|
vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
|
||||||
vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_NEON_ASM
|
|
||||||
void vpx_lpf_horizontal_8_dual_neon(
|
|
||||||
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
|
||||||
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
|
||||||
const uint8_t *limit1, const uint8_t *thresh1) {
|
|
||||||
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
|
|
||||||
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
|
||||||
const uint8_t *limit0, const uint8_t *thresh0,
|
|
||||||
const uint8_t *blimit1, const uint8_t *limit1,
|
|
||||||
const uint8_t *thresh1) {
|
|
||||||
vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
|
|
||||||
vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
|
||||||
}
|
|
||||||
#endif // HAVE_NEON_ASM
|
|
||||||
|
@ -514,8 +514,7 @@ add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *bl
|
|||||||
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_8_dual sse2 neon dspr2 msa/;
|
||||||
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/;
|
||||||
@ -533,8 +532,7 @@ add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *
|
|||||||
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||||
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon dspr2 msa/;
|
||||||
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
|
|
||||||
|
|
||||||
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||||
specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa/;
|
specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa/;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user