Properly save neon registers.

Replace current code which corrupts the stack by
duplicate of vp8 code to save and restore neon
registers.

Change-Id: Ibb0220b9aa985d10533befa0a455ebce57a2891a
This commit is contained in:
Christian Duvivier 2013-09-26 16:01:37 -07:00
parent db60c02c9e
commit b1b4ba1bdd
4 changed files with 46 additions and 18 deletions

View File

@ -29,17 +29,19 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
extern void save_neon_registers();
extern void restore_neon_registers();
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
extern void vp9_push_neon(int64_t *store);
extern void vp9_pop_neon(int64_t *store);
void vp9_short_idct16x16_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
save_neon_registers();
vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@ -102,18 +104,19 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
dest_stride);
// restore d8-d15 register values.
restore_neon_registers();
vp9_pop_neon(store_reg);
return;
}
void vp9_short_idct16x16_10_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
save_neon_registers();
vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@ -163,7 +166,7 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
dest_stride);
// restore d8-d15 register values.
restore_neon_registers();
vp9_pop_neon(store_reg);
return;
}

View File

@ -0,0 +1,36 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_push_neon|
EXPORT |vp9_pop_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
|vp9_push_neon| PROC
vst1.i64 {d8, d9, d10, d11}, [r0]!
vst1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
ENDP
|vp9_pop_neon| PROC
vld1.i64 {d8, d9, d10, d11}, [r0]!
vld1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
ENDP
END

View File

@ -12,8 +12,6 @@
EXPORT |vp9_short_idct16x16_add_neon_pass2|
EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
EXPORT |save_neon_registers|
EXPORT |restore_neon_registers|
ARM
REQUIRE8
PRESERVE8
@ -1178,14 +1176,4 @@ end_idct10_16x16_pass2
pop {r3-r9}
bx lr
ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
;void |save_neon_registers|()
|save_neon_registers| PROC
vpush {d8-d15}
bx lr
ENDP ; |save_registers|
;void |restore_neon_registers|()
|restore_neon_registers| PROC
vpop {d8-d15}
bx lr
ENDP ; |restore_registers|
END

View File

@ -108,5 +108,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))