vpx/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
Johann 56f5a9a060 update arm idct functions
Jeff Muizelaar posted some changes to the idct/reconstruction c code.
This is the equivalent update for the arm assembly.

This shows a good boost on v6, and a minor boost on neon.
Here are some numbers for highway in qcif, 2641 frames:
HEAD neon: ~161 fps
new neon:  ~162 fps
HEAD v6:   ~102 fps
new v6:    ~106 fps

The following functions have been updated for armv6 and neon:
vp8_dc_only_idct_add
vp8_dequant_idct_add
vp8_dequant_dc_idct_add

Conflicts:

	vp8/decoder/arm/armv6/dequantdcidct_v6.asm
	vp8/decoder/arm/armv6/dequantidct_v6.asm

Resolved by removing these files. When I rewrote the functions, I also
moved the files to dequant_dc_idct_v6.asm/dequant_idct_v6.asm

Change-Id: Ie3300df824d52474eca1a5134cf22d8b7809a5d4
2010-07-26 08:55:19 -04:00

68 lines
1.9 KiB
NASM

;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp8_dc_only_idct_add_v6|
AREA |.text|, CODE, READONLY
;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
; unsigned char *dst_ptr, int pitch, int stride)
; r0 input_dc
; r1 pred_ptr
; r2 dest_ptr
; r3 pitch
; sp stride
|vp8_dc_only_idct_add_v6| PROC
stmdb sp!, {r4 - r7, lr}
add r0, r0, #4 ; input_dc += 4
ldr r12, c0x0000FFFF
ldr r4, [r1], r3
ldr r6, [r1], r3
and r0, r12, r0, asr #3 ; input_dc >> 3 + mask
ldr lr, [sp, #20]
orr r0, r0, r0, lsl #16 ; a1 | a1
uxtab16 r5, r0, r4 ; a1+2 | a1+0
uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1
uxtab16 r7, r0, r6
uxtab16 r6, r0, r6, ror #8
usat16 r5, #8, r5
usat16 r4, #8, r4
usat16 r7, #8, r7
usat16 r6, #8, r6
orr r5, r5, r4, lsl #8
orr r7, r7, r6, lsl #8
ldr r4, [r1], r3
ldr r6, [r1]
str r5, [r2], lr
str r7, [r2], lr
uxtab16 r5, r0, r4
uxtab16 r4, r0, r4, ror #8
uxtab16 r7, r0, r6
uxtab16 r6, r0, r6, ror #8
usat16 r5, #8, r5
usat16 r4, #8, r4
usat16 r7, #8, r7
usat16 r6, #8, r6
orr r5, r5, r4, lsl #8
orr r7, r7, r6, lsl #8
str r5, [r2], lr
str r7, [r2]
ldmia sp!, {r4 - r7, pc}
ENDP ; |vp8_dc_only_idct_add_v6|
; Constant Pool
c0x0000FFFF DCD 0x0000FFFF
END