VP9 common for ARMv8 by using NEON intrinsics 14
Add vp9_idct16x16_add_neon.c - vp9_idct16x16_256_add_neon_pass1 - vp9_idct16x16_256_add_neon_pass2 - vp9_idct16x16_10_add_neon_pass1 - vp9_idct16x16_10_add_neon_pass2 Change-Id: I54d25b54a36f4371760f54e4036693aaea40a5de Signed-off-by: James Yu <james.yu@linaro.org>
This commit is contained in:
parent
ce76aeb00d
commit
3cfed4bf76
1330
vp9/common/arm/neon/vp9_idct16x16_add_neon.c
Normal file
1330
vp9/common/arm/neon/vp9_idct16x16_add_neon.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -30,18 +30,24 @@ void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
|
||||
extern void vp9_push_neon(int64_t *store);
|
||||
extern void vp9_pop_neon(int64_t *store);
|
||||
#endif // HAVE_NEON_ASM
|
||||
|
||||
void vp9_idct16x16_256_add_neon(const int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
#if HAVE_NEON_ASM
|
||||
int64_t store_reg[8];
|
||||
#endif
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// save d8-d15 register values.
|
||||
vp9_push_neon(store_reg);
|
||||
#endif
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
@ -103,20 +109,26 @@ void vp9_idct16x16_256_add_neon(const int16_t *input,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// restore d8-d15 register values.
|
||||
vp9_pop_neon(store_reg);
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp9_idct16x16_10_add_neon(const int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
#if HAVE_NEON_ASM
|
||||
int64_t store_reg[8];
|
||||
#endif
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// save d8-d15 register values.
|
||||
vp9_push_neon(store_reg);
|
||||
#endif
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
@ -165,8 +177,10 @@ void vp9_idct16x16_10_add_neon(const int16_t *input,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
#if HAVE_NEON_ASM
|
||||
// restore d8-d15 register values.
|
||||
vp9_pop_neon(store_reg);
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -440,12 +440,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
|
||||
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon dspr2/;
|
||||
|
||||
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
|
||||
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
|
||||
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon dspr2/;
|
||||
|
||||
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
|
||||
|
@ -131,10 +131,8 @@ ifeq ($(ARCH_X86_64), yes)
|
||||
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm
|
||||
endif
|
||||
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_neon.c
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht4x4_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht8x8_add_neon$(ASM)
|
||||
@ -151,6 +149,8 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_1_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon_asm$(ASM)
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM)
|
||||
@ -166,6 +166,8 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_1_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon.c
|
||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon.c
|
||||
|
Loading…
x
Reference in New Issue
Block a user