Merge "Add back vp9_short_idct32x32_1_add_neon which is deleted in cleanup I63df79a13cf62aa2c9360a7a26933c100f9ebda3."
This commit is contained in:
commit
a6462990e6
144
vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm
Normal file
144
vp9/common/arm/neon/vp9_short_idct32x32_1_add_neon.asm
Normal file
@ -0,0 +1,144 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_idct32x32_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;TODO(hkuang): put the following macros in a seperate
|
||||
;file so other idct function could also use them.
|
||||
MACRO
|
||||
LD_16x8 $src, $stride
|
||||
vld1.8 {q8}, [$src], $stride
|
||||
vld1.8 {q9}, [$src], $stride
|
||||
vld1.8 {q10}, [$src], $stride
|
||||
vld1.8 {q11}, [$src], $stride
|
||||
vld1.8 {q12}, [$src], $stride
|
||||
vld1.8 {q13}, [$src], $stride
|
||||
vld1.8 {q14}, [$src], $stride
|
||||
vld1.8 {q15}, [$src], $stride
|
||||
MEND
|
||||
|
||||
MACRO
|
||||
ADD_DIFF_16x8 $diff
|
||||
vqadd.u8 q8, q8, $diff
|
||||
vqadd.u8 q9, q9, $diff
|
||||
vqadd.u8 q10, q10, $diff
|
||||
vqadd.u8 q11, q11, $diff
|
||||
vqadd.u8 q12, q12, $diff
|
||||
vqadd.u8 q13, q13, $diff
|
||||
vqadd.u8 q14, q14, $diff
|
||||
vqadd.u8 q15, q15, $diff
|
||||
MEND
|
||||
|
||||
MACRO
|
||||
SUB_DIFF_16x8 $diff
|
||||
vqsub.u8 q8, q8, $diff
|
||||
vqsub.u8 q9, q9, $diff
|
||||
vqsub.u8 q10, q10, $diff
|
||||
vqsub.u8 q11, q11, $diff
|
||||
vqsub.u8 q12, q12, $diff
|
||||
vqsub.u8 q13, q13, $diff
|
||||
vqsub.u8 q14, q14, $diff
|
||||
vqsub.u8 q15, q15, $diff
|
||||
MEND
|
||||
|
||||
MACRO
|
||||
ST_16x8 $dst, $stride
|
||||
vst1.8 {q8}, [$dst], $stride
|
||||
vst1.8 {q9}, [$dst], $stride
|
||||
vst1.8 {q10},[$dst], $stride
|
||||
vst1.8 {q11},[$dst], $stride
|
||||
vst1.8 {q12},[$dst], $stride
|
||||
vst1.8 {q13},[$dst], $stride
|
||||
vst1.8 {q14},[$dst], $stride
|
||||
vst1.8 {q15},[$dst], $stride
|
||||
MEND
|
||||
|
||||
;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride
|
||||
|
||||
|vp9_idct32x32_1_add_neon| PROC
|
||||
push {lr}
|
||||
pld [r1]
|
||||
add r3, r1, #16 ; r3 dest + 16 for second loop
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 6)
|
||||
add r0, r0, #32 ; + (1 <<((6) - 1))
|
||||
asrs r0, r0, #6 ; >> 6
|
||||
bge diff_positive_32_32
|
||||
|
||||
diff_negative_32_32
|
||||
neg r0, r0
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q0, r0
|
||||
mov r0, #4
|
||||
|
||||
diff_negative_32_32_loop
|
||||
sub r0, #1
|
||||
LD_16x8 r1, r2
|
||||
SUB_DIFF_16x8 q0
|
||||
ST_16x8 r12, r2
|
||||
|
||||
LD_16x8 r1, r2
|
||||
SUB_DIFF_16x8 q0
|
||||
ST_16x8 r12, r2
|
||||
cmp r0, #2
|
||||
moveq r1, r3
|
||||
moveq r12, r3
|
||||
cmp r0, #0
|
||||
bne diff_negative_32_32_loop
|
||||
pop {r3,pc}
|
||||
|
||||
diff_positive_32_32
|
||||
usat r0, #8, r0
|
||||
vdup.u8 q0, r0
|
||||
mov r0, #4
|
||||
|
||||
diff_positive_32_32_loop
|
||||
sub r0, #1
|
||||
LD_16x8 r1, r2
|
||||
ADD_DIFF_16x8 q0
|
||||
ST_16x8 r12, r2
|
||||
|
||||
LD_16x8 r1, r2
|
||||
ADD_DIFF_16x8 q0
|
||||
ST_16x8 r12, r2
|
||||
cmp r0, #2
|
||||
moveq r1, r3
|
||||
moveq r12, r3
|
||||
cmp r0, #0
|
||||
bne diff_positive_32_32_loop
|
||||
pop {pc}
|
||||
|
||||
ENDP ; |vp9_idct32x32_1_add_neon|
|
||||
END
|
@ -299,7 +299,7 @@ prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int de
|
||||
specialize vp9_idct32x32_34_add sse2
|
||||
|
||||
prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct32x32_1_add sse2 dspr2
|
||||
specialize vp9_idct32x32_1_add sse2 neon dspr2
|
||||
|
||||
prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
|
||||
specialize vp9_iht4x4_16_add sse2 neon dspr2
|
||||
|
@ -123,6 +123,7 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_1_add_neon$(AS
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct8x8_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_1_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct16x16_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_1_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_idct32x32_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht4x4_add_neon$(ASM)
|
||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM)
|
||||
|
Loading…
x
Reference in New Issue
Block a user