3273c7b679
common/arm/vpx_asm_offsets moves up a level. prepare for muxing with encoder/arm/vpx_vp8_enc_asm_offsets Change-Id: I89a04a5235447e66571995c9d9b4b6edcb038e24
321 lines
12 KiB
NASM
321 lines
12 KiB
NASM
;
|
|
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
;
|
|
; Use of this source code is governed by a BSD-style license
|
|
; that can be found in the LICENSE file in the root of the source
|
|
; tree. An additional intellectual property rights grant can be found
|
|
; in the file PATENTS. All contributing project authors may
|
|
; be found in the AUTHORS file in the root of the source tree.
|
|
;
|
|
|
|
|
|
EXPORT |vp8_decode_mb_tokens_v6|
|
|
|
|
AREA |.text|, CODE, READONLY ; name this block of code
|
|
|
|
INCLUDE assembly_offsets.asm
|
|
|
|
l_qcoeff EQU 0
|
|
l_i EQU 4
|
|
l_type EQU 8
|
|
l_stop EQU 12
|
|
l_c EQU 16
|
|
l_l_ptr EQU 20
|
|
l_a_ptr EQU 24
|
|
l_bc EQU 28
|
|
l_coef_ptr EQU 32
|
|
l_stacksize EQU 64
|
|
|
|
|
|
;; constant offsets -- these should be created at build time
|
|
c_block2above_offset EQU 25
|
|
c_entropy_nodes EQU 11
|
|
c_dct_eob_token EQU 11
|
|
|
|
|vp8_decode_mb_tokens_v6| PROC
|
|
stmdb sp!, {r4 - r11, lr}
|
|
sub sp, sp, #l_stacksize
|
|
mov r7, r1 ; type
|
|
mov r9, r0 ; detoken
|
|
|
|
ldr r1, [r9, #detok_current_bc]
|
|
ldr r0, [r9, #detok_qcoeff_start_ptr]
|
|
mov r11, #0 ; i
|
|
mov r3, #16 ; stop
|
|
|
|
cmp r7, #1 ; type ?= 1
|
|
addeq r11, r11, #24 ; i = 24
|
|
addeq r3, r3, #8 ; stop = 24
|
|
addeq r0, r0, #3, 24 ; qcoefptr += 24*16
|
|
|
|
str r0, [sp, #l_qcoeff]
|
|
str r11, [sp, #l_i]
|
|
str r7, [sp, #l_type]
|
|
str r3, [sp, #l_stop]
|
|
str r1, [sp, #l_bc]
|
|
|
|
add lr, r9, r7, lsl #2 ; detoken + type*4
|
|
|
|
ldr r8, [r1, #bool_decoder_user_buffer]
|
|
|
|
ldr r10, [lr, #detok_coef_probs]
|
|
ldr r5, [r1, #bool_decoder_count]
|
|
ldr r6, [r1, #bool_decoder_range]
|
|
ldr r4, [r1, #bool_decoder_value]
|
|
|
|
str r10, [sp, #l_coef_ptr]
|
|
|
|
BLOCK_LOOP
|
|
ldr r3, [r9, #detok_ptr_block2leftabove]
|
|
ldr r1, [r9, #detok_L]
|
|
ldr r2, [r9, #detok_A]
|
|
ldrb r12, [r3, r11]! ; block2left[i]
|
|
ldrb r3, [r3, #c_block2above_offset]; block2above[i]
|
|
|
|
cmp r7, #0 ; c = !type
|
|
moveq r7, #1
|
|
movne r7, #0
|
|
|
|
ldrb r0, [r1, r12]! ; *(L += block2left[i])
|
|
ldrb r3, [r2, r3]! ; *(A += block2above[i])
|
|
mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
|
|
|
|
; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
|
|
cmp r0, #0 ; *l ?= 0
|
|
movne r0, #1
|
|
cmp r3, #0 ; *a ?= 0
|
|
addne r0, r0, #1 ; t
|
|
|
|
str r1, [sp, #l_l_ptr] ; save &l
|
|
str r2, [sp, #l_a_ptr] ; save &a
|
|
smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
|
|
mov r1, #0 ; t = 0
|
|
str r7, [sp, #l_c]
|
|
|
|
;align 4
|
|
COEFF_LOOP
|
|
ldr r3, [r9, #detok_ptr_coef_bands_x]
|
|
ldr lr, [r9, #detok_coef_tree_ptr]
|
|
;STALL
|
|
ldrb r3, [r3, r7] ; coef_bands_x[c]
|
|
;STALL
|
|
;STALL
|
|
add r0, r0, r3 ; Prob += coef_bands_x[c]
|
|
|
|
get_token_loop
|
|
ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
|
|
mov r3, r6, lsl #8 ; range << 8
|
|
sub r3, r3, #256 ; (range << 8) - (1 << 8)
|
|
mov r10, #1 ; 1
|
|
|
|
smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
|
|
|
|
ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
|
|
;++
|
|
|
|
subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
|
|
addhs r1, r1, #1 ; t += 1
|
|
movhs r4, r3 ; value -= bigsplit (split << 24)
|
|
subhs r2, r6, r2 ; range -= split
|
|
; movlo r6, r2 ; range = split
|
|
|
|
ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
|
|
|
|
; NORMALIZE
|
|
clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
|
|
sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
|
|
subs r5, r5, r3 ; count -= shift
|
|
mov r6, r2, lsl r3 ; range <<= shift
|
|
mov r4, r4, lsl r3 ; value <<= shift
|
|
|
|
; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
|
|
addle r5, r5, #8 ; count += 8
|
|
rsble r3, r5, #24 ; 24 - count
|
|
addle r8, r8, #1 ; bufptr++
|
|
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
|
|
|
|
cmp r1, #0 ; t ?= 0
|
|
bgt get_token_loop ; while (t > 0)
|
|
|
|
cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
|
|
beq END_OF_BLOCK ; break
|
|
|
|
rsb lr, r1, #0 ; v = -t;
|
|
|
|
cmp lr, #4 ; if(v > FOUR_TOKEN)
|
|
ble SKIP_EXTRABITS
|
|
|
|
ldr r3, [r9, #detok_teb_base_ptr]
|
|
mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
|
|
add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
|
|
|
|
ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
|
|
ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
|
|
|
|
extrabits_loop
|
|
add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
|
|
|
|
ldrb r2, [r3, #4] ; probability. why +4?
|
|
mov r3, r6, lsl #8 ; range << 8
|
|
sub r3, r3, #256 ; range << 8 + 1 << 8
|
|
|
|
smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
|
|
|
|
ldrb r12, [r8] ; *bufptr
|
|
;++
|
|
|
|
subs r10, r4, r2, lsl #24 ; value - (split<<24)
|
|
movhs r4, r10 ; value = value - (split << 24)
|
|
subhs r2, r6, r2 ; range = range - split
|
|
addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
|
|
|
|
; NORMALIZE
|
|
clz r3, r2 ; shift - leading zeros in split
|
|
sub r3, r3, #24 ; don't count first 3 bytes
|
|
subs r5, r5, r3 ; count -= shift
|
|
mov r6, r2, lsl r3 ; range = range << shift
|
|
mov r4, r4, lsl r3 ; value <<= shift
|
|
|
|
addle r5, r5, #8 ; count += BR_COUNT
|
|
addle r8, r8, #1 ; bufptr++
|
|
rsble r3, r5, #24 ; BR_COUNT - count
|
|
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
|
|
|
subs r0, r0, #1 ; bits_count --
|
|
bpl extrabits_loop
|
|
|
|
|
|
SKIP_EXTRABITS
|
|
ldr r11, [sp, #l_qcoeff]
|
|
ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
|
|
|
|
cmp r1, #0 ; check for nonzero token - if (t)
|
|
beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
|
|
|
|
add r3, r6, #1 ; range + 1
|
|
mov r2, r3, lsr #1 ; split = (range + 1) >> 1
|
|
|
|
subs r3, r4, r2, lsl #24 ; value - (split<<24)
|
|
movhs r4, r3 ; value -= (split << 24)
|
|
subhs r2, r6, r2 ; range -= split
|
|
mvnhs r3, lr ; -v
|
|
addhs lr, r3, #1 ; v = (v ^ -1) + 1
|
|
|
|
; NORMALIZE
|
|
clz r3, r2 ; leading 0s in split
|
|
sub r3, r3, #24 ; shift
|
|
subs r5, r5, r3 ; count -= shift
|
|
mov r6, r2, lsl r3 ; range <<= shift
|
|
mov r4, r4, lsl r3 ; value <<= shift
|
|
ldrleb r2, [r8], #1 ; *(bufptr++)
|
|
addle r5, r5, #8 ; count += 8
|
|
rsble r3, r5, #24 ; BR_COUNT - count
|
|
orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
|
|
|
add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
|
|
|
cmn r1, #1 ; t < -ONE_TOKEN
|
|
|
|
addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
|
|
|
mvn r1, #1 ; t = -1 ???? C is -2
|
|
|
|
SKIP_EOB_CHECK
|
|
ldr r7, [sp, #l_c] ; c
|
|
ldr r3, [r9, #detok_scan]
|
|
add r1, r1, #2 ; t+= 2
|
|
cmp r7, #15 ; c should will be one higher
|
|
|
|
ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
|
|
add r7, r7, #1 ; c++
|
|
add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
|
|
|
|
str r7, [sp, #l_c] ; store c
|
|
strh lr, [r3] ; qcoef_ptr[scan[c]] = v
|
|
|
|
blt COEFF_LOOP
|
|
|
|
sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
|
|
|
|
END_OF_BLOCK
|
|
ldr r3, [sp, #l_type] ; type
|
|
ldr r10, [sp, #l_coef_ptr] ; coef_ptr
|
|
ldr r0, [sp, #l_qcoeff] ; qcoeff
|
|
ldr r11, [sp, #l_i] ; i
|
|
ldr r12, [sp, #l_stop] ; stop
|
|
|
|
cmp r3, #0 ; type ?= 0
|
|
moveq r1, #1
|
|
movne r1, #0
|
|
add r3, r11, r9 ; detok + i
|
|
|
|
cmp r7, r1 ; c ?= !type
|
|
strb r7, [r3, #detok_eob] ; eob[i] = c
|
|
|
|
ldr r7, [sp, #l_l_ptr] ; l
|
|
ldr r2, [sp, #l_a_ptr] ; a
|
|
movne r3, #1 ; t
|
|
moveq r3, #0
|
|
|
|
add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
|
|
add r11, r11, #1 ; i++
|
|
strb r3, [r7] ; *l = t
|
|
strb r3, [r2] ; *a = t
|
|
str r0, [sp, #l_qcoeff] ; qcoeff
|
|
str r11, [sp, #l_i] ; i
|
|
|
|
cmp r11, r12 ; i < stop
|
|
ldr r7, [sp, #l_type] ; type
|
|
|
|
blt BLOCK_LOOP
|
|
|
|
cmp r11, #25 ; i ?= 25
|
|
bne ln2_decode_mb_to
|
|
|
|
ldr r12, [r9, #detok_qcoeff_start_ptr]
|
|
ldr r10, [r9, #detok_coef_probs]
|
|
mov r7, #0 ; type/i = 0
|
|
mov r3, #16 ; stop = 16
|
|
str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
|
|
str r7, [sp, #l_i]
|
|
str r7, [sp, #l_type]
|
|
str r3, [sp, #l_stop]
|
|
|
|
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
|
|
|
|
b BLOCK_LOOP
|
|
|
|
ln2_decode_mb_to
|
|
cmp r11, #16 ; i ?= 16
|
|
bne ln1_decode_mb_to
|
|
|
|
mov r10, #detok_coef_probs
|
|
add r10, r10, #2*4 ; coef_probs[type]
|
|
ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
|
|
|
|
mov r7, #2 ; type = 2
|
|
mov r3, #24 ; stop = 24
|
|
|
|
str r7, [sp, #l_type]
|
|
str r3, [sp, #l_stop]
|
|
|
|
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
|
|
b BLOCK_LOOP
|
|
|
|
ln1_decode_mb_to
|
|
ldr r2, [sp, #l_bc]
|
|
mov r0, #0
|
|
nop
|
|
|
|
str r8, [r2, #bool_decoder_user_buffer]
|
|
str r5, [r2, #bool_decoder_count]
|
|
str r4, [r2, #bool_decoder_value]
|
|
str r6, [r2, #bool_decoder_range]
|
|
|
|
add sp, sp, #l_stacksize
|
|
ldmia sp!, {r4 - r11, pc}
|
|
|
|
ENDP ; |vp8_decode_mb_tokens_v6|
|
|
|
|
END
|