Properly save neon registers.
Replace current code which corrupts the stack by duplicate of vp8 code to save and restore neon registers. Change-Id: Ibb0220b9aa985d10533befa0a455ebce57a2891a
This commit is contained in:
parent
db60c02c9e
commit
b1b4ba1bdd
@ -29,17 +29,19 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void save_neon_registers();
|
||||
extern void restore_neon_registers();
|
||||
|
||||
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
|
||||
extern void vp9_push_neon(int64_t *store);
|
||||
extern void vp9_pop_neon(int64_t *store);
|
||||
|
||||
void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
vp9_push_neon(store_reg);
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
@ -102,18 +104,19 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
vp9_pop_neon(store_reg);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
vp9_push_neon(store_reg);
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
@ -163,7 +166,7 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
vp9_pop_neon(store_reg);
|
||||
|
||||
return;
|
||||
}
|
||||
|
36
vp9/common/arm/neon/vp9_save_reg_neon.asm
Normal file
36
vp9/common/arm/neon/vp9_save_reg_neon.asm
Normal file
@ -0,0 +1,36 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_push_neon|
|
||||
EXPORT |vp9_pop_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_push_neon| PROC
|
||||
vst1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vst1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
|vp9_pop_neon| PROC
|
||||
vld1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vld1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
@ -12,8 +12,6 @@
|
||||
EXPORT |vp9_short_idct16x16_add_neon_pass2|
|
||||
EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
|
||||
EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
|
||||
EXPORT |save_neon_registers|
|
||||
EXPORT |restore_neon_registers|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@ -1178,14 +1176,4 @@ end_idct10_16x16_pass2
|
||||
pop {r3-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
|
||||
;void |save_neon_registers|()
|
||||
|save_neon_registers| PROC
|
||||
vpush {d8-d15}
|
||||
bx lr
|
||||
ENDP ; |save_registers|
|
||||
;void |restore_neon_registers|()
|
||||
|restore_neon_registers| PROC
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
ENDP ; |restore_registers|
|
||||
END
|
||||
|
@ -108,5 +108,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
|
||||
|
||||
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
|
||||
|
Loading…
Reference in New Issue
Block a user