VP9 common for ARMv8 by using NEON intrinsics 14

Add vp9_idct16x16_add_neon.c
- vp9_idct16x16_256_add_neon_pass1
- vp9_idct16x16_256_add_neon_pass2
- vp9_idct16x16_10_add_neon_pass1
- vp9_idct16x16_10_add_neon_pass2

Change-Id: I54d25b54a36f4371760f54e4036693aaea40a5de
Signed-off-by: James Yu <james.yu@linaro.org>
This commit is contained in:
James Yu 2014-02-08 01:52:15 +08:00 committed by Johann
parent ce76aeb00d
commit 3cfed4bf76
5 changed files with 1350 additions and 6 deletions

File diff suppressed because it is too large Load Diff

View File

@ -30,18 +30,24 @@ void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
uint8_t *dest,
int dest_stride);
#if HAVE_NEON_ASM
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
extern void vp9_push_neon(int64_t *store);
extern void vp9_pop_neon(int64_t *store);
#endif // HAVE_NEON_ASM
void vp9_idct16x16_256_add_neon(const int16_t *input,
uint8_t *dest, int dest_stride) {
#if HAVE_NEON_ASM
int64_t store_reg[8];
#endif
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
#if HAVE_NEON_ASM
// save d8-d15 register values.
vp9_push_neon(store_reg);
#endif
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@ -103,20 +109,26 @@ void vp9_idct16x16_256_add_neon(const int16_t *input,
dest+8,
dest_stride);
#if HAVE_NEON_ASM
// restore d8-d15 register values.
vp9_pop_neon(store_reg);
#endif
return;
}
void vp9_idct16x16_10_add_neon(const int16_t *input,
uint8_t *dest, int dest_stride) {
#if HAVE_NEON_ASM
int64_t store_reg[8];
#endif
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
#if HAVE_NEON_ASM
// save d8-d15 register values.
vp9_push_neon(store_reg);
#endif
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@ -165,8 +177,10 @@ void vp9_idct16x16_10_add_neon(const int16_t *input,
dest+8,
dest_stride);
#if HAVE_NEON_ASM
// restore d8-d15 register values.
vp9_pop_neon(store_reg);
#endif
return;
}

View File

@ -440,12 +440,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon dspr2/;
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon dspr2/;
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;

View File

@ -131,10 +131,8 @@ ifeq ($(ARCH_X86_64), yes)
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm
endif
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon_asm$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht4x4_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht8x8_add_neon$(ASM)
@ -151,6 +149,8 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_1_add_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM)
@ -166,6 +166,8 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct32x32_1_add_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon.c