NEON asm of vpx_lpf_{horizontal,vertical}_8_dual_neon()
Also expose the NEON intrinsics version. BUG=webm:1261, webm:1266. Change-Id: I8c4ae658467dcf66ebf7a75982b2ef712dbb4535
This commit is contained in:
parent
5d881770e5
commit
f9efbad392
@ -531,16 +531,12 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vpx_lpf_vertical_8_neon, &vpx_lpf_vertical_8_c, 8),
|
||||
make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8),
|
||||
make_tuple(&vpx_lpf_vertical_4_neon, &vpx_lpf_vertical_4_c, 8)));
|
||||
INSTANTIATE_TEST_CASE_P(NEON, Loop8Test9Param,
|
||||
::testing::Values(
|
||||
// Using #if inside the macro is unsupported on MSVS but the tests are not
|
||||
// currently built for MSVS with ARM and NEON.
|
||||
#if HAVE_NEON_ASM
|
||||
make_tuple(&vpx_lpf_horizontal_8_dual_neon,
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, Loop8Test9Param,
|
||||
::testing::Values(make_tuple(&vpx_lpf_horizontal_8_dual_neon,
|
||||
&vpx_lpf_horizontal_8_dual_c, 8),
|
||||
make_tuple(&vpx_lpf_vertical_8_dual_neon,
|
||||
&vpx_lpf_vertical_8_dual_c, 8),
|
||||
#endif // HAVE_NEON_ASM
|
||||
make_tuple(&vpx_lpf_horizontal_4_dual_neon,
|
||||
&vpx_lpf_horizontal_4_dual_c, 8),
|
||||
make_tuple(&vpx_lpf_vertical_4_dual_neon,
|
||||
|
@ -9,7 +9,9 @@
|
||||
;
|
||||
|
||||
EXPORT |vpx_lpf_horizontal_8_neon|
|
||||
EXPORT |vpx_lpf_horizontal_8_dual_neon|
|
||||
EXPORT |vpx_lpf_vertical_8_neon|
|
||||
EXPORT |vpx_lpf_vertical_8_dual_neon|
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
@ -64,6 +66,38 @@
|
||||
|
||||
ENDP ; |vpx_lpf_horizontal_8_neon|
|
||||
|
||||
;void vpx_lpf_horizontal_8_dual_neon(uint8_t *s,
|
||||
; int p,
|
||||
; const uint8_t *blimit0,
|
||||
; const uint8_t *limit0,
|
||||
; const uint8_t *thresh0,
|
||||
; const uint8_t *blimit1,
|
||||
; const uint8_t *limit1,
|
||||
; const uint8_t *thresh1)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int p, /* pitch */
|
||||
; r2 const uint8_t *blimit0,
|
||||
; r3 const uint8_t *limit0,
|
||||
; sp const uint8_t *thresh0,
|
||||
; sp + 4 const uint8_t *blimit1,
|
||||
; sp + 8 const uint8_t *limit1,
|
||||
; sp + 12 const uint8_t *thresh1,
|
||||
|vpx_lpf_horizontal_8_dual_neon| PROC
|
||||
push {r0-r1, lr}
|
||||
ldr lr, [sp, #12]
|
||||
push {lr} ; thresh0
|
||||
bl vpx_lpf_horizontal_8_neon
|
||||
|
||||
ldr r2, [sp, #20] ; blimit1
|
||||
ldr r3, [sp, #24] ; limit1
|
||||
ldr lr, [sp, #28]
|
||||
str lr, [sp, #16] ; thresh1
|
||||
add sp, #4
|
||||
pop {r0-r1, lr}
|
||||
add r0, #8 ; s + 8
|
||||
b vpx_lpf_horizontal_8_neon
|
||||
ENDP ; |vpx_lpf_horizontal_8_dual_neon|
|
||||
|
||||
; void vpx_lpf_vertical_8_neon(uint8_t *s,
|
||||
; int pitch,
|
||||
; const uint8_t *blimit,
|
||||
@ -139,6 +173,38 @@
|
||||
pop {r4-r5, pc}
|
||||
ENDP ; |vpx_lpf_vertical_8_neon|
|
||||
|
||||
;void vpx_lpf_vertical_8_dual_neon(uint8_t *s,
|
||||
; int pitch,
|
||||
; const uint8_t *blimit0,
|
||||
; const uint8_t *limit0,
|
||||
; const uint8_t *thresh0,
|
||||
; const uint8_t *blimit1,
|
||||
; const uint8_t *limit1,
|
||||
; const uint8_t *thresh1)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int pitch
|
||||
; r2 const uint8_t *blimit0,
|
||||
; r3 const uint8_t *limit0,
|
||||
; sp const uint8_t *thresh0,
|
||||
; sp + 4 const uint8_t *blimit1,
|
||||
; sp + 8 const uint8_t *limit1,
|
||||
; sp + 12 const uint8_t *thresh1,
|
||||
|vpx_lpf_vertical_8_dual_neon| PROC
|
||||
push {r0-r1, lr}
|
||||
ldr lr, [sp, #12]
|
||||
push {lr} ; thresh0
|
||||
bl vpx_lpf_vertical_8_neon
|
||||
|
||||
ldr r2, [sp, #20] ; blimit1
|
||||
ldr r3, [sp, #24] ; limit1
|
||||
ldr lr, [sp, #28]
|
||||
str lr, [sp, #16] ; thresh1
|
||||
add sp, #4
|
||||
pop {r0-r1, lr}
|
||||
add r0, r1, lsl #3 ; s + 8 * pitch
|
||||
b vpx_lpf_vertical_8_neon
|
||||
ENDP ; |vpx_lpf_vertical_8_dual_neon|
|
||||
|
||||
; void vpx_mbloop_filter_neon();
|
||||
; This is a helper function for the loopfilters. The invidual functions do the
|
||||
; necessary load, transpose (if necessary) and store. The function does not use
|
||||
|
@ -311,6 +311,14 @@ void vpx_lpf_horizontal_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||
return;
|
||||
}
|
||||
|
||||
void vpx_lpf_horizontal_8_dual_neon(
|
||||
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
||||
const uint8_t *limit1, const uint8_t *thresh1) {
|
||||
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
|
||||
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void vpx_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
int i;
|
||||
@ -427,3 +435,11 @@ void vpx_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0,
|
||||
const uint8_t *blimit1, const uint8_t *limit1,
|
||||
const uint8_t *thresh1) {
|
||||
vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
|
||||
vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
@ -21,21 +21,3 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
|
||||
vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
void vpx_lpf_horizontal_8_dual_neon(
|
||||
uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
|
||||
const uint8_t *limit1, const uint8_t *thresh1) {
|
||||
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
|
||||
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
|
||||
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
const uint8_t *limit0, const uint8_t *thresh0,
|
||||
const uint8_t *blimit1, const uint8_t *limit1,
|
||||
const uint8_t *thresh1) {
|
||||
vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
|
||||
vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
|
||||
}
|
||||
#endif // HAVE_NEON_ASM
|
||||
|
@ -514,8 +514,7 @@ add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *bl
|
||||
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
|
||||
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
|
||||
specialize qw/vpx_lpf_vertical_8_dual sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/;
|
||||
@ -533,8 +532,7 @@ add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *
|
||||
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
|
||||
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
|
||||
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon dspr2 msa/;
|
||||
|
||||
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa/;
|
||||
|
Loading…
x
Reference in New Issue
Block a user