160 lines
4.2 KiB
NASM
160 lines
4.2 KiB
NASM
;
|
|
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
|
;
|
|
; Use of this source code is governed by a BSD-style license and patent
|
|
; grant that can be found in the LICENSE file in the root of the source
|
|
; tree. All contributing project authors may be found in the AUTHORS
|
|
; file in the root of the source tree.
|
|
;
|
|
|
|
|
|
EXPORT |vp8_decode_value_neon|
|
|
EXPORT |vp8dx_start_decode_neon|
|
|
EXPORT |vp8dx_stop_decode_neon|
|
|
EXPORT |vp8dx_decode_bool_neon|
|
|
|
|
ARM
|
|
REQUIRE8
|
|
PRESERVE8
|
|
|
|
INCLUDE vpx_asm_offsets.asm
|
|
|
|
AREA |.text|, CODE, READONLY ; name this block of code
|
|
|
|
; int z = 0;
|
|
; int bit;
|
|
; for ( bit=bits-1; bit>=0; bit-- )
|
|
; {
|
|
; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
|
|
; }
|
|
; return z;
|
|
|
|
;int vp8_decode_value_neon ( BOOL_DECODER *br, int bits )
|
|
|vp8_decode_value_neon| PROC
|
|
stmdb sp!, {r4 - r6, lr}
|
|
mov r4, r0
|
|
mov r5, r1
|
|
mov r6, #0
|
|
|
|
subs r5, r5, #1
|
|
bmi decode_value_exit
|
|
|
|
decode_value_loop
|
|
mov r1, #0x80
|
|
mov r0, r4
|
|
bl vp8dx_decode_bool_neon_internal ; needed for conversion to s file
|
|
orr r6, r6, r0, lsl r5
|
|
subs r5, r5, #1
|
|
bpl decode_value_loop
|
|
|
|
decode_value_exit
|
|
mov r0, r6
|
|
ldmia sp!, {r4 - r6, pc}
|
|
ENDP ; |vp8_decode_value_neon|
|
|
|
|
|
|
;void vp8dx_start_decode_neon ( BOOL_DECODER *br, unsigned char *source )
|
|
|vp8dx_start_decode_neon| PROC
|
|
stmdb sp!, {r4 - r5, lr}
|
|
mov r2, #0
|
|
mov r3, #255
|
|
|
|
str r2, [r0, #bool_decoder_lowvalue]
|
|
str r3, [r0, #bool_decoder_range]
|
|
str r1, [r0, #bool_decoder_buffer]
|
|
|
|
mov r3, #8
|
|
mov r2, #4
|
|
str r3, [r0, #bool_decoder_count]
|
|
str r2, [r0, #bool_decoder_pos]
|
|
|
|
ldrb r2, [r1, #3]
|
|
ldrb r3, [r1, #2]
|
|
ldrb r4, [r1, #1]
|
|
ldrb r5, [r1]
|
|
|
|
orr r1, r2, r3, lsl #8
|
|
orr r1, r1, r4, lsl #16
|
|
orr r1, r1, r5, lsl #24
|
|
|
|
str r1, [r0, #bool_decoder_value]
|
|
|
|
ldmia sp!, {r4 - r5, pc}
|
|
ENDP ; |vp8dx_start_decode_neon|
|
|
|
|
|
|
;void vp8dx_stop_decode_neon ( BOOL_DECODER *bc );
|
|
|vp8dx_stop_decode_neon| PROC
|
|
mov pc, lr
|
|
ENDP ; |vp8dx_stop_decode_neon|
|
|
|
|
|
|
; bigsplit RN r1
|
|
; buffer_v RN r1
|
|
; count_v RN r4
|
|
; range_v RN r2
|
|
; value_v RN r3
|
|
; pos_v RN r5
|
|
; split RN r6
|
|
; bit RN lr
|
|
;int vp8dx_decode_bool_neon ( BOOL_DECODER *br, int probability )
|
|
|vp8dx_decode_bool_neon| PROC
|
|
vp8dx_decode_bool_neon_internal
|
|
;LDRD and STRD doubleword data transfers must be eight-byte aligned. Use ALIGN 8
|
|
;before memory allocation
|
|
stmdb sp!, {r4 - r5, lr}
|
|
|
|
ldr r2, [r0, #bool_decoder_range] ;load range (r2), value(r3)
|
|
ldr r3, [r0, #bool_decoder_value]
|
|
;ldrd r2, r3, [r0, #bool_decoder_range] ;ldrd costs 2 cycles
|
|
;
|
|
|
|
mov r4, r2, lsl #8
|
|
sub r4, r4, #256
|
|
mov r12, #1
|
|
|
|
smlawb r4, r4, r1, r12 ;split = 1 + (((range-1) * probability) >> 8)
|
|
|
|
mov lr, r0
|
|
mov r0, #0 ;bit = 0
|
|
;
|
|
subs r5, r3, r4, lsl #24
|
|
|
|
subhs r2, r2, r4 ;range = br->range-split
|
|
movlo r2, r4 ;range = split
|
|
movhs r0, #1 ;bit = 1
|
|
movhs r3, r5 ;value = value-bigsplit
|
|
|
|
cmp r2, #0x80
|
|
blt range_less_0x80
|
|
strd r2, r3, [lr, #bool_decoder_range] ;store result
|
|
|
|
ldmia sp!, {r4 - r5, pc}
|
|
|
|
range_less_0x80
|
|
|
|
ldrd r4, r5, [lr, #bool_decoder_count] ;load count, pos, buffer
|
|
ldr r1, [lr, #bool_decoder_buffer]
|
|
|
|
clz r12, r2
|
|
add r1, r1, r5
|
|
|
|
sub r12, r12, #24
|
|
subs r4, r4, r12 ;count -= shift
|
|
mov r2, r2, lsl r12 ;range <<= shift
|
|
mov r3, r3, lsl r12 ;value <<= shift
|
|
addle r4, r4, #8 ;count += 8
|
|
ldrleb r12, [r1], #1 ;br->buffer[br->pos]
|
|
|
|
rsble r1, r4, #8 ;-count
|
|
addle r5, r5, #1 ;br->pos++
|
|
orrle r3, r3, r12, lsl r1 ;value |= (br->buffer[br->pos]) << (-count)
|
|
|
|
strd r2, r3, [lr, #bool_decoder_range] ;store result
|
|
strd r4, r5, [lr, #bool_decoder_count]
|
|
|
|
ldmia sp!, {r4 - r5, pc}
|
|
ENDP ; |vp8dx_decode_bool_neon|
|
|
|
|
END
|