281 lines
6.9 KiB
NASM
281 lines
6.9 KiB
NASM
|
;
|
||
|
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||
|
;
|
||
|
; Use of this source code is governed by a BSD-style license and patent
|
||
|
; grant that can be found in the LICENSE file in the root of the source
|
||
|
; tree. All contributing project authors may be found in the AUTHORS
|
||
|
; file in the root of the source tree.
|
||
|
;
|
||
|
|
||
|
|
||
|
EXPORT |vp8_recon_b_armv6|
|
||
|
EXPORT |vp8_recon2b_armv6|
|
||
|
EXPORT |vp8_recon4b_armv6|
|
||
|
|
||
|
AREA |.text|, CODE, READONLY ; name this block of code
|
||
|
prd RN r0
|
||
|
dif RN r1
|
||
|
dst RN r2
|
||
|
stride RN r3
|
||
|
|
||
|
;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
|
||
|
; R0 char* pred_ptr
|
||
|
; R1 short * dif_ptr
|
||
|
; R2 char * dst_ptr
|
||
|
; R3 int stride
|
||
|
|
||
|
; Description:
|
||
|
; Loop through the block adding the Pred and Diff together. Clamp and then
|
||
|
; store back into the Dst.
|
||
|
|
||
|
; Restrictions :
|
||
|
; all buffers are expected to be 4 byte aligned coming in and
|
||
|
; going out.
|
||
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
;
|
||
|
;
|
||
|
;
|
||
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
|vp8_recon_b_armv6| PROC
|
||
|
stmdb sp!, {r4 - r9, lr}
|
||
|
|
||
|
;0, 1, 2, 3
|
||
|
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||
|
ldr r6, [dif, #0] ; 1 | 0
|
||
|
ldr r7, [dif, #4] ; 3 | 2
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||
|
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||
|
|
||
|
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||
|
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||
|
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
add dif, dif, #32
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst], stride
|
||
|
|
||
|
;0, 1, 2, 3
|
||
|
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||
|
;; ldr r6, [dif, #8] ; 1 | 0
|
||
|
;; ldr r7, [dif, #12] ; 3 | 2
|
||
|
ldr r6, [dif, #0] ; 1 | 0
|
||
|
ldr r7, [dif, #4] ; 3 | 2
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||
|
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||
|
|
||
|
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||
|
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||
|
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
add dif, dif, #32
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst], stride
|
||
|
|
||
|
;0, 1, 2, 3
|
||
|
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||
|
;; ldr r6, [dif, #16] ; 1 | 0
|
||
|
;; ldr r7, [dif, #20] ; 3 | 2
|
||
|
ldr r6, [dif, #0] ; 1 | 0
|
||
|
ldr r7, [dif, #4] ; 3 | 2
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||
|
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||
|
|
||
|
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||
|
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||
|
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
add dif, dif, #32
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst], stride
|
||
|
|
||
|
;0, 1, 2, 3
|
||
|
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||
|
;; ldr r6, [dif, #24] ; 1 | 0
|
||
|
;; ldr r7, [dif, #28] ; 3 | 2
|
||
|
ldr r6, [dif, #0] ; 1 | 0
|
||
|
ldr r7, [dif, #4] ; 3 | 2
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||
|
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||
|
|
||
|
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||
|
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||
|
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst], stride
|
||
|
|
||
|
ldmia sp!, {r4 - r9, pc}
|
||
|
|
||
|
ENDP ; |recon_b|
|
||
|
|
||
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
;
|
||
|
;
|
||
|
;
|
||
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
; R0 char *pred_ptr
|
||
|
; R1 short *dif_ptr
|
||
|
; R2 char *dst_ptr
|
||
|
; R3 int stride
|
||
|
|vp8_recon4b_armv6| PROC
|
||
|
stmdb sp!, {r4 - r9, lr}
|
||
|
|
||
|
mov lr, #4
|
||
|
|
||
|
recon4b_loop
|
||
|
;0, 1, 2, 3
|
||
|
ldr r4, [prd], #4 ; 3 | 2 | 1 | 0
|
||
|
ldr r6, [dif, #0] ; 1 | 0
|
||
|
ldr r7, [dif, #4] ; 3 | 2
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||
|
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||
|
|
||
|
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||
|
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||
|
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst]
|
||
|
|
||
|
;4, 5, 6, 7
|
||
|
ldr r4, [prd], #4
|
||
|
;; ldr r6, [dif, #32]
|
||
|
;; ldr r7, [dif, #36]
|
||
|
ldr r6, [dif, #8]
|
||
|
ldr r7, [dif, #12]
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16
|
||
|
pkhtb r9, r7, r6, asr #16
|
||
|
|
||
|
uxtab16 r8, r8, r4
|
||
|
uxtab16 r9, r9, r4, ror #8
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst, #4]
|
||
|
|
||
|
;8, 9, 10, 11
|
||
|
ldr r4, [prd], #4
|
||
|
;; ldr r6, [dif, #64]
|
||
|
;; ldr r7, [dif, #68]
|
||
|
ldr r6, [dif, #16]
|
||
|
ldr r7, [dif, #20]
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16
|
||
|
pkhtb r9, r7, r6, asr #16
|
||
|
|
||
|
uxtab16 r8, r8, r4
|
||
|
uxtab16 r9, r9, r4, ror #8
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst, #8]
|
||
|
|
||
|
;12, 13, 14, 15
|
||
|
ldr r4, [prd], #4
|
||
|
;; ldr r6, [dif, #96]
|
||
|
;; ldr r7, [dif, #100]
|
||
|
ldr r6, [dif, #24]
|
||
|
ldr r7, [dif, #28]
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16
|
||
|
pkhtb r9, r7, r6, asr #16
|
||
|
|
||
|
uxtab16 r8, r8, r4
|
||
|
uxtab16 r9, r9, r4, ror #8
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst, #12]
|
||
|
|
||
|
add dst, dst, stride
|
||
|
;; add dif, dif, #8
|
||
|
add dif, dif, #32
|
||
|
|
||
|
subs lr, lr, #1
|
||
|
bne recon4b_loop
|
||
|
|
||
|
ldmia sp!, {r4 - r9, pc}
|
||
|
|
||
|
ENDP ; |Recon4B|
|
||
|
|
||
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
;
|
||
|
;
|
||
|
;
|
||
|
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
; R0 char *pred_ptr
|
||
|
; R1 short *dif_ptr
|
||
|
; R2 char *dst_ptr
|
||
|
; R3 int stride
|
||
|
|vp8_recon2b_armv6| PROC
|
||
|
stmdb sp!, {r4 - r9, lr}
|
||
|
|
||
|
mov lr, #4
|
||
|
|
||
|
recon2b_loop
|
||
|
;0, 1, 2, 3
|
||
|
ldr r4, [prd], #4
|
||
|
ldr r6, [dif, #0]
|
||
|
ldr r7, [dif, #4]
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16
|
||
|
pkhtb r9, r7, r6, asr #16
|
||
|
|
||
|
uxtab16 r8, r8, r4
|
||
|
uxtab16 r9, r9, r4, ror #8
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst]
|
||
|
|
||
|
;4, 5, 6, 7
|
||
|
ldr r4, [prd], #4
|
||
|
;; ldr r6, [dif, #32]
|
||
|
;; ldr r7, [dif, #36]
|
||
|
ldr r6, [dif, #8]
|
||
|
ldr r7, [dif, #12]
|
||
|
|
||
|
pkhbt r8, r6, r7, lsl #16
|
||
|
pkhtb r9, r7, r6, asr #16
|
||
|
|
||
|
uxtab16 r8, r8, r4
|
||
|
uxtab16 r9, r9, r4, ror #8
|
||
|
usat16 r8, #8, r8
|
||
|
usat16 r9, #8, r9
|
||
|
orr r8, r8, r9, lsl #8
|
||
|
|
||
|
str r8, [dst, #4]
|
||
|
|
||
|
add dst, dst, stride
|
||
|
;; add dif, dif, #8
|
||
|
add dif, dif, #16
|
||
|
|
||
|
subs lr, lr, #1
|
||
|
bne recon2b_loop
|
||
|
|
||
|
ldmia sp!, {r4 - r9, pc}
|
||
|
|
||
|
ENDP ; |Recon2B|
|
||
|
|
||
|
END
|