update arm idct functions
Jeff Muizelaar posted some changes to the idct/reconstruction c code. This is the equivalent update for the arm assembly. This shows a good boost on v6, and a minor boost on neon. Here are some numbers for highway in qcif, 2641 frames: HEAD neon: ~161 fps new neon: ~162 fps HEAD v6: ~102 fps new v6: ~106 fps The following functions have been updated for armv6 and neon: vp8_dc_only_idct_add vp8_dequant_idct_add vp8_dequant_dc_idct_add Conflicts: vp8/decoder/arm/armv6/dequantdcidct_v6.asm vp8/decoder/arm/armv6/dequantidct_v6.asm Resolved by removing these files. When I rewrote the functions, I also moved the files to dequant_dc_idct_v6.asm/dequant_idct_v6.asm Change-Id: Ie3300df824d52474eca1a5134cf22d8b7809a5d4
This commit is contained in:
		
							
								
								
									
										67
									
								
								vp8/common/arm/armv6/dc_only_idct_add_v6.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								vp8/common/arm/armv6/dc_only_idct_add_v6.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,67 @@
 | 
			
		||||
;
 | 
			
		||||
;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 | 
			
		||||
;
 | 
			
		||||
;  Use of this source code is governed by a BSD-style license and patent
 | 
			
		||||
;  grant that can be found in the LICENSE file in the root of the source
 | 
			
		||||
;  tree. All contributing project authors may be found in the AUTHORS
 | 
			
		||||
;  file in the root of the source tree.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
    EXPORT  |vp8_dc_only_idct_add_v6|
 | 
			
		||||
 | 
			
		||||
    AREA    |.text|, CODE, READONLY
 | 
			
		||||
 | 
			
		||||
;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
 | 
			
		||||
;                             unsigned char *dst_ptr, int pitch, int stride)
 | 
			
		||||
; r0  input_dc
 | 
			
		||||
; r1  pred_ptr
 | 
			
		||||
; r2  dest_ptr
 | 
			
		||||
; r3  pitch
 | 
			
		||||
; sp  stride
 | 
			
		||||
 | 
			
		||||
|vp8_dc_only_idct_add_v6| PROC
 | 
			
		||||
    stmdb       sp!, {r4 - r7, lr}
 | 
			
		||||
 | 
			
		||||
    add         r0, r0, #4                ; input_dc += 4
 | 
			
		||||
    ldr         r12, c0x0000FFFF
 | 
			
		||||
    ldr         r4, [r1], r3
 | 
			
		||||
    ldr         r6, [r1], r3
 | 
			
		||||
    and         r0, r12, r0, asr #3       ; input_dc >> 3 + mask
 | 
			
		||||
    ldr         lr, [sp, #20]
 | 
			
		||||
    orr         r0, r0, r0, lsl #16       ; a1 | a1
 | 
			
		||||
 | 
			
		||||
    uxtab16     r5, r0, r4                ; a1+2 | a1+0
 | 
			
		||||
    uxtab16     r4, r0, r4, ror #8        ; a1+3 | a1+1
 | 
			
		||||
    uxtab16     r7, r0, r6
 | 
			
		||||
    uxtab16     r6, r0, r6, ror #8
 | 
			
		||||
    usat16      r5, #8, r5
 | 
			
		||||
    usat16      r4, #8, r4
 | 
			
		||||
    usat16      r7, #8, r7
 | 
			
		||||
    usat16      r6, #8, r6
 | 
			
		||||
    orr         r5, r5, r4, lsl #8
 | 
			
		||||
    orr         r7, r7, r6, lsl #8
 | 
			
		||||
    ldr         r4, [r1], r3
 | 
			
		||||
    ldr         r6, [r1]
 | 
			
		||||
    str         r5, [r2], lr
 | 
			
		||||
    str         r7, [r2], lr
 | 
			
		||||
 | 
			
		||||
    uxtab16     r5, r0, r4
 | 
			
		||||
    uxtab16     r4, r0, r4, ror #8
 | 
			
		||||
    uxtab16     r7, r0, r6
 | 
			
		||||
    uxtab16     r6, r0, r6, ror #8
 | 
			
		||||
    usat16      r5, #8, r5
 | 
			
		||||
    usat16      r4, #8, r4
 | 
			
		||||
    usat16      r7, #8, r7
 | 
			
		||||
    usat16      r6, #8, r6
 | 
			
		||||
    orr         r5, r5, r4, lsl #8
 | 
			
		||||
    orr         r7, r7, r6, lsl #8
 | 
			
		||||
    str         r5, [r2], lr
 | 
			
		||||
    str         r7, [r2]
 | 
			
		||||
 | 
			
		||||
    ldmia       sp!, {r4 - r7, pc}
 | 
			
		||||
 | 
			
		||||
    ENDP  ; |vp8_dc_only_idct_add_v6|
 | 
			
		||||
 | 
			
		||||
; Constant Pool
 | 
			
		||||
c0x0000FFFF DCD 0x0000FFFF
 | 
			
		||||
    END
 | 
			
		||||
@@ -15,8 +15,6 @@
 | 
			
		||||
    EXPORT  |vp8_short_idct4x4llm_v6_scott|
 | 
			
		||||
    EXPORT  |vp8_short_idct4x4llm_v6_dual|
 | 
			
		||||
 | 
			
		||||
    EXPORT  |vp8_dc_only_idct_armv6|
 | 
			
		||||
 | 
			
		||||
    AREA    |.text|, CODE, READONLY
 | 
			
		||||
 | 
			
		||||
;********************************************************************************
 | 
			
		||||
@@ -344,34 +342,4 @@ loop2_dual
 | 
			
		||||
    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
 | 
			
		||||
    ENDP
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; sjl added 10/17/08
 | 
			
		||||
;void dc_only_idct_armv6(short input_dc, short *output, int pitch)
 | 
			
		||||
|vp8_dc_only_idct_armv6| PROC
 | 
			
		||||
    stmdb       sp!, {r4 - r6, lr}
 | 
			
		||||
 | 
			
		||||
    add         r0, r0, #0x4
 | 
			
		||||
    add         r4, r1, r2                      ; output + shortpitch
 | 
			
		||||
    mov         r0, r0, ASR #0x3    ;aka a1
 | 
			
		||||
    add         r5, r1, r2, LSL #1              ; output + shortpitch * 2
 | 
			
		||||
    pkhbt       r0, r0, r0, lsl #16             ; a1 | a1
 | 
			
		||||
    add         r6, r5, r2                      ; output + shortpitch * 3
 | 
			
		||||
 | 
			
		||||
    str         r0, [r1, #0]
 | 
			
		||||
    str         r0, [r1, #4]
 | 
			
		||||
 | 
			
		||||
    str         r0, [r4, #0]
 | 
			
		||||
    str         r0, [r4, #4]
 | 
			
		||||
 | 
			
		||||
    str         r0, [r5, #0]
 | 
			
		||||
    str         r0, [r5, #4]
 | 
			
		||||
 | 
			
		||||
    str         r0, [r6, #0]
 | 
			
		||||
    str         r0, [r6, #4]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    ldmia       sp!, {r4 - r6, pc}
 | 
			
		||||
 | 
			
		||||
    ENDP  ; |vp8_dc_only_idct_armv6|
 | 
			
		||||
 | 
			
		||||
    END
 | 
			
		||||
 
 | 
			
		||||
@@ -8,8 +8,8 @@
 | 
			
		||||
;  be found in the AUTHORS file in the root of the source tree.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
    EXPORT |vp8_short_inv_walsh4x4_armv6|
 | 
			
		||||
    EXPORT |vp8_short_inv_walsh4x4_1_armv6|
 | 
			
		||||
    EXPORT |vp8_short_inv_walsh4x4_v6|
 | 
			
		||||
    EXPORT |vp8_short_inv_walsh4x4_1_v6|
 | 
			
		||||
 | 
			
		||||
    ARM
 | 
			
		||||
    REQUIRE8
 | 
			
		||||
@@ -17,8 +17,8 @@
 | 
			
		||||
 | 
			
		||||
    AREA    |.text|, CODE, READONLY  ; name this block of code
 | 
			
		||||
 | 
			
		||||
;short vp8_short_inv_walsh4x4_armv6(short *input, short *output)
 | 
			
		||||
|vp8_short_inv_walsh4x4_armv6| PROC
 | 
			
		||||
;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
 | 
			
		||||
|vp8_short_inv_walsh4x4_v6| PROC
 | 
			
		||||
 | 
			
		||||
    stmdb       sp!, {r4 - r11, lr}
 | 
			
		||||
 | 
			
		||||
@@ -123,11 +123,11 @@
 | 
			
		||||
    str         r5, [r1]
 | 
			
		||||
 | 
			
		||||
    ldmia       sp!, {r4 - r11, pc}
 | 
			
		||||
    ENDP        ; |vp8_short_inv_walsh4x4_armv6|
 | 
			
		||||
    ENDP        ; |vp8_short_inv_walsh4x4_v6|
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
;short vp8_short_inv_walsh4x4_1_armv6(short *input, short *output)
 | 
			
		||||
|vp8_short_inv_walsh4x4_1_armv6| PROC
 | 
			
		||||
;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
 | 
			
		||||
|vp8_short_inv_walsh4x4_1_v6| PROC
 | 
			
		||||
 | 
			
		||||
    ldrsh       r2, [r0]             ; [0]
 | 
			
		||||
    add         r2, r2, #3           ; [0] + 3
 | 
			
		||||
@@ -145,7 +145,7 @@
 | 
			
		||||
    str         r2, [r1]
 | 
			
		||||
 | 
			
		||||
    bx          lr
 | 
			
		||||
    ENDP        ; |vp8_short_inv_walsh4x4_1_armv6|
 | 
			
		||||
    ENDP        ; |vp8_short_inv_walsh4x4_1_v6|
 | 
			
		||||
 | 
			
		||||
; Constant Pool
 | 
			
		||||
c0x00030003 DCD 0x00030003
 | 
			
		||||
 
 | 
			
		||||
@@ -15,8 +15,9 @@
 | 
			
		||||
#if HAVE_ARMV6
 | 
			
		||||
extern prototype_idct(vp8_short_idct4x4llm_1_v6);
 | 
			
		||||
extern prototype_idct(vp8_short_idct4x4llm_v6_dual);
 | 
			
		||||
extern prototype_second_order(vp8_short_inv_walsh4x4_1_armv6);
 | 
			
		||||
extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
 | 
			
		||||
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
 | 
			
		||||
extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
 | 
			
		||||
extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 | 
			
		||||
 | 
			
		||||
#undef  vp8_idct_idct1
 | 
			
		||||
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
 | 
			
		||||
@@ -24,16 +25,20 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
 | 
			
		||||
#undef  vp8_idct_idct16
 | 
			
		||||
#define vp8_idct_idct16 vp8_short_idct4x4llm_v6_dual
 | 
			
		||||
 | 
			
		||||
#undef  vp8_idct_idct1_scalar_add
 | 
			
		||||
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_v6
 | 
			
		||||
 | 
			
		||||
#undef  vp8_idct_iwalsh1
 | 
			
		||||
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_armv6
 | 
			
		||||
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_v6
 | 
			
		||||
 | 
			
		||||
#undef  vp8_idct_iwalsh16
 | 
			
		||||
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_armv6
 | 
			
		||||
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if HAVE_ARMV7
 | 
			
		||||
extern prototype_idct(vp8_short_idct4x4llm_1_neon);
 | 
			
		||||
extern prototype_idct(vp8_short_idct4x4llm_neon);
 | 
			
		||||
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
 | 
			
		||||
extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
 | 
			
		||||
extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 | 
			
		||||
 | 
			
		||||
@@ -43,6 +48,9 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 | 
			
		||||
#undef  vp8_idct_idct16
 | 
			
		||||
#define vp8_idct_idct16 vp8_short_idct4x4llm_neon
 | 
			
		||||
 | 
			
		||||
#undef  vp8_idct_idct1_scalar_add
 | 
			
		||||
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_neon
 | 
			
		||||
 | 
			
		||||
#undef  vp8_idct_iwalsh1
 | 
			
		||||
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										49
									
								
								vp8/common/arm/neon/dc_only_idct_add_neon.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								vp8/common/arm/neon/dc_only_idct_add_neon.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
			
		||||
;
 | 
			
		||||
;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 | 
			
		||||
;
 | 
			
		||||
;  Use of this source code is governed by a BSD-style license and patent
 | 
			
		||||
;  grant that can be found in the LICENSE file in the root of the source
 | 
			
		||||
;  tree. All contributing project authors may be found in the AUTHORS
 | 
			
		||||
;  file in the root of the source tree.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    EXPORT  |vp8_dc_only_idct_add_neon|
 | 
			
		||||
    ARM
 | 
			
		||||
    REQUIRE8
 | 
			
		||||
    PRESERVE8
 | 
			
		||||
 | 
			
		||||
    AREA ||.text||, CODE, READONLY, ALIGN=2
 | 
			
		||||
;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr,
 | 
			
		||||
;                               unsigned char *dst_ptr, int pitch, int stride)
 | 
			
		||||
; r0  input_dc
 | 
			
		||||
; r1  pred_ptr
 | 
			
		||||
; r2  dst_ptr
 | 
			
		||||
; r3  pitch
 | 
			
		||||
; sp  stride
 | 
			
		||||
|vp8_dc_only_idct_add_neon| PROC
 | 
			
		||||
    add             r0, r0, #4
 | 
			
		||||
    asr             r0, r0, #3
 | 
			
		||||
    ldr             r12, [sp]
 | 
			
		||||
    vdup.16         q0, r0
 | 
			
		||||
 | 
			
		||||
    vld1.32         {d2[0]}, [r1], r3
 | 
			
		||||
    vld1.32         {d2[1]}, [r1], r3
 | 
			
		||||
    vld1.32         {d4[0]}, [r1], r3
 | 
			
		||||
    vld1.32         {d4[1]}, [r1]
 | 
			
		||||
 | 
			
		||||
    vaddw.u8        q1, q0, d2
 | 
			
		||||
    vaddw.u8        q2, q0, d4
 | 
			
		||||
 | 
			
		||||
    vqmovun.s16     d2, q1
 | 
			
		||||
    vqmovun.s16     d4, q2
 | 
			
		||||
 | 
			
		||||
    vst1.32         {d2[0]}, [r2], r12
 | 
			
		||||
    vst1.32         {d2[1]}, [r2], r12
 | 
			
		||||
    vst1.32         {d4[0]}, [r2], r12
 | 
			
		||||
    vst1.32         {d4[1]}, [r2]
 | 
			
		||||
 | 
			
		||||
    bx             lr
 | 
			
		||||
 | 
			
		||||
    ENDP
 | 
			
		||||
    END
 | 
			
		||||
							
								
								
									
										218
									
								
								vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,218 @@
 | 
			
		||||
;
 | 
			
		||||
;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 | 
			
		||||
;
 | 
			
		||||
;  Use of this source code is governed by a BSD-style license and patent
 | 
			
		||||
;  grant that can be found in the LICENSE file in the root of the source
 | 
			
		||||
;  tree. All contributing project authors may be found in the AUTHORS
 | 
			
		||||
;  file in the root of the source tree.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    EXPORT |vp8_dequant_dc_idct_add_v6|
 | 
			
		||||
 | 
			
		||||
    AREA |.text|, CODE, READONLY
 | 
			
		||||
 | 
			
		||||
;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
 | 
			
		||||
; unsigned char *dest, int pitch, int stride, int Dc)
 | 
			
		||||
; r0 = input
 | 
			
		||||
; r1 = dq
 | 
			
		||||
; r2 = pred
 | 
			
		||||
; r3 = dest
 | 
			
		||||
; sp + 36 = pitch  ; +4 = 40
 | 
			
		||||
; sp + 40 = stride  ; +4 = 44
 | 
			
		||||
; sp + 44 = Dc  ; +4 = 48
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
|vp8_dequant_dc_idct_add_v6| PROC
 | 
			
		||||
    stmdb   sp!, {r4-r11, lr}
 | 
			
		||||
 | 
			
		||||
    ldr     r6, [sp, #44]
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0]                ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    sub     sp, sp, #4
 | 
			
		||||
    str     r3, [sp]
 | 
			
		||||
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    mov     r12, #3
 | 
			
		||||
 | 
			
		||||
vp8_dequant_dc_add_loop
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    subs    r12, r12, #1
 | 
			
		||||
 | 
			
		||||
    ldrne   r4, [r0, #4]
 | 
			
		||||
    ldrne   r5, [r1], #4
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    bne     vp8_dequant_dc_add_loop
 | 
			
		||||
 | 
			
		||||
    sub     r0, r0, #32
 | 
			
		||||
    mov     r1, r0
 | 
			
		||||
 | 
			
		||||
; short_idct4x4llm_v6_dual
 | 
			
		||||
    ldr     r3, cospi8sqrt2minus1
 | 
			
		||||
    ldr     r4, sinpi8sqrt2
 | 
			
		||||
    ldr     r6, [r0, #8]
 | 
			
		||||
    mov     r5, #2
 | 
			
		||||
vp8_dequant_dc_idct_loop1_v6
 | 
			
		||||
    ldr     r12, [r0, #24]
 | 
			
		||||
    ldr     r14, [r0, #16]
 | 
			
		||||
    smulwt  r9, r3, r6
 | 
			
		||||
    smulwb  r7, r3, r6
 | 
			
		||||
    smulwt  r10, r4, r6
 | 
			
		||||
    smulwb  r8, r4, r6
 | 
			
		||||
    pkhbt   r7, r7, r9, lsl #16
 | 
			
		||||
    smulwt  r11, r3, r12
 | 
			
		||||
    pkhbt   r8, r8, r10, lsl #16
 | 
			
		||||
    uadd16  r6, r6, r7
 | 
			
		||||
    smulwt  r7, r4, r12
 | 
			
		||||
    smulwb  r9, r3, r12
 | 
			
		||||
    smulwb  r10, r4, r12
 | 
			
		||||
    subs    r5, r5, #1
 | 
			
		||||
    pkhbt   r9, r9, r11, lsl #16
 | 
			
		||||
    ldr     r11, [r0], #4
 | 
			
		||||
    pkhbt   r10, r10, r7, lsl #16
 | 
			
		||||
    uadd16  r7, r12, r9
 | 
			
		||||
    usub16  r7, r8, r7
 | 
			
		||||
    uadd16  r6, r6, r10
 | 
			
		||||
    uadd16  r10, r11, r14
 | 
			
		||||
    usub16  r8, r11, r14
 | 
			
		||||
    uadd16  r9, r10, r6
 | 
			
		||||
    usub16  r10, r10, r6
 | 
			
		||||
    uadd16  r6, r8, r7
 | 
			
		||||
    usub16  r7, r8, r7
 | 
			
		||||
    str     r6, [r1, #8]
 | 
			
		||||
    ldrne   r6, [r0, #8]
 | 
			
		||||
    str     r7, [r1, #16]
 | 
			
		||||
    str     r10, [r1, #24]
 | 
			
		||||
    str     r9, [r1], #4
 | 
			
		||||
    bne     vp8_dequant_dc_idct_loop1_v6
 | 
			
		||||
 | 
			
		||||
    mov     r5, #2
 | 
			
		||||
    sub     r0, r1, #8
 | 
			
		||||
vp8_dequant_dc_idct_loop2_v6
 | 
			
		||||
    ldr     r6, [r0], #4
 | 
			
		||||
    ldr     r7, [r0], #4
 | 
			
		||||
    ldr     r8, [r0], #4
 | 
			
		||||
    ldr     r9, [r0], #4
 | 
			
		||||
    smulwt  r1, r3, r6
 | 
			
		||||
    smulwt  r12, r4, r6
 | 
			
		||||
    smulwt  lr, r3, r8
 | 
			
		||||
    smulwt  r10, r4, r8
 | 
			
		||||
    pkhbt   r11, r8, r6, lsl #16
 | 
			
		||||
    pkhbt   r1, lr, r1, lsl #16
 | 
			
		||||
    pkhbt   r12, r10, r12, lsl #16
 | 
			
		||||
    pkhtb   r6, r6, r8, asr #16
 | 
			
		||||
    uadd16  r6, r1, r6
 | 
			
		||||
    pkhbt   lr, r9, r7, lsl #16
 | 
			
		||||
    uadd16  r10, r11, lr
 | 
			
		||||
    usub16  lr, r11, lr
 | 
			
		||||
    pkhtb   r8, r7, r9, asr #16
 | 
			
		||||
    subs    r5, r5, #1
 | 
			
		||||
    smulwt  r1, r3, r8
 | 
			
		||||
    smulwb  r7, r3, r8
 | 
			
		||||
    smulwt  r11, r4, r8
 | 
			
		||||
    smulwb  r9, r4, r8
 | 
			
		||||
    pkhbt   r1, r7, r1, lsl #16
 | 
			
		||||
    uadd16  r8, r1, r8
 | 
			
		||||
    pkhbt   r11, r9, r11, lsl #16
 | 
			
		||||
    usub16  r1, r12, r8
 | 
			
		||||
    uadd16  r8, r11, r6
 | 
			
		||||
    ldr     r9, c0x00040004
 | 
			
		||||
    ldr     r12, [sp, #40]
 | 
			
		||||
    uadd16  r6, r10, r8
 | 
			
		||||
    usub16  r7, r10, r8
 | 
			
		||||
    uadd16  r7, r7, r9
 | 
			
		||||
    uadd16  r6, r6, r9
 | 
			
		||||
    uadd16  r10, r14, r1
 | 
			
		||||
    usub16  r1, r14, r1
 | 
			
		||||
    uadd16  r10, r10, r9
 | 
			
		||||
    uadd16  r1, r1, r9
 | 
			
		||||
    ldr     r11, [r2], r12
 | 
			
		||||
    mov     r8, r7, asr #3
 | 
			
		||||
    pkhtb   r9, r8, r10, asr #19
 | 
			
		||||
    mov     r8, r1, asr #3
 | 
			
		||||
    pkhtb   r8, r8, r6, asr #19
 | 
			
		||||
    uxtb16  lr, r11, ror #8
 | 
			
		||||
    qadd16  r9, r9, lr
 | 
			
		||||
    uxtb16  lr, r11
 | 
			
		||||
    qadd16  r8, r8, lr
 | 
			
		||||
    usat16  r9, #8, r9
 | 
			
		||||
    usat16  r8, #8, r8
 | 
			
		||||
    orr     r9, r8, r9, lsl #8
 | 
			
		||||
    ldr     r11, [r2], r12
 | 
			
		||||
    ldr     lr, [sp]
 | 
			
		||||
    ldr     r12, [sp, #44]
 | 
			
		||||
    mov     r7, r7, lsl #16
 | 
			
		||||
    mov     r1, r1, lsl #16
 | 
			
		||||
    mov     r10, r10, lsl #16
 | 
			
		||||
    mov     r6, r6, lsl #16
 | 
			
		||||
    mov     r7, r7, asr #3
 | 
			
		||||
    pkhtb   r7, r7, r10, asr #19
 | 
			
		||||
    mov     r1, r1, asr #3
 | 
			
		||||
    pkhtb   r1, r1, r6, asr #19
 | 
			
		||||
    uxtb16  r8, r11, ror #8
 | 
			
		||||
    qadd16  r7, r7, r8
 | 
			
		||||
    uxtb16  r8, r11
 | 
			
		||||
    qadd16  r1, r1, r8
 | 
			
		||||
    usat16  r7, #8, r7
 | 
			
		||||
    usat16  r1, #8, r1
 | 
			
		||||
    orr     r1, r1, r7, lsl #8
 | 
			
		||||
    str     r9, [lr], r12
 | 
			
		||||
    str     r1, [lr], r12
 | 
			
		||||
    str     lr, [sp]
 | 
			
		||||
    bne     vp8_dequant_dc_idct_loop2_v6
 | 
			
		||||
 | 
			
		||||
; vpx_memset
 | 
			
		||||
    sub     r0, r0, #32
 | 
			
		||||
    add     sp, sp, #4
 | 
			
		||||
 | 
			
		||||
    mov     r12, #0
 | 
			
		||||
    str     r12, [r0]
 | 
			
		||||
    str     r12, [r0, #4]
 | 
			
		||||
    str     r12, [r0, #8]
 | 
			
		||||
    str     r12, [r0, #12]
 | 
			
		||||
    str     r12, [r0, #16]
 | 
			
		||||
    str     r12, [r0, #20]
 | 
			
		||||
    str     r12, [r0, #24]
 | 
			
		||||
    str     r12, [r0, #28]
 | 
			
		||||
 | 
			
		||||
    ldmia   sp!, {r4 - r11, pc}
 | 
			
		||||
    ENDP    ; |vp8_dequant_dc_idct_add_v6|
 | 
			
		||||
 | 
			
		||||
; Constant Pool
 | 
			
		||||
cospi8sqrt2minus1 DCD 0x00004E7B
 | 
			
		||||
sinpi8sqrt2       DCD 0x00008A8C
 | 
			
		||||
c0x00040004       DCD 0x00040004
 | 
			
		||||
 | 
			
		||||
    END
 | 
			
		||||
							
								
								
									
										196
									
								
								vp8/decoder/arm/armv6/dequant_idct_v6.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										196
									
								
								vp8/decoder/arm/armv6/dequant_idct_v6.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,196 @@
 | 
			
		||||
;
 | 
			
		||||
;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 | 
			
		||||
;
 | 
			
		||||
;  Use of this source code is governed by a BSD-style license and patent
 | 
			
		||||
;  grant that can be found in the LICENSE file in the root of the source
 | 
			
		||||
;  tree. All contributing project authors may be found in the AUTHORS
 | 
			
		||||
;  file in the root of the source tree.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
    EXPORT |vp8_dequant_idct_add_v6|
 | 
			
		||||
 | 
			
		||||
    AREA |.text|, CODE, READONLY
 | 
			
		||||
;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
 | 
			
		||||
; unsigned char *dest, int pitch, int stride)
 | 
			
		||||
; r0 = input
 | 
			
		||||
; r1 = dq
 | 
			
		||||
; r2 = pred
 | 
			
		||||
; r3 = dest
 | 
			
		||||
; sp + 36 = pitch  ; +4 = 40
 | 
			
		||||
; sp + 40 = stride  ; +4 = 44
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
|vp8_dequant_idct_add_v6| PROC
 | 
			
		||||
    stmdb   sp!, {r4-r11, lr}
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0]                ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    sub     sp, sp, #4
 | 
			
		||||
    str     r3, [sp]
 | 
			
		||||
 | 
			
		||||
    mov     r12, #4
 | 
			
		||||
 | 
			
		||||
vp8_dequant_add_loop
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    subs    r12, r12, #1
 | 
			
		||||
 | 
			
		||||
    ldrne   r4, [r0, #4]
 | 
			
		||||
    ldrne   r5, [r1], #4
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    bne     vp8_dequant_add_loop
 | 
			
		||||
 | 
			
		||||
    sub     r0, r0, #32
 | 
			
		||||
    mov     r1, r0
 | 
			
		||||
 | 
			
		||||
; short_idct4x4llm_v6_dual
 | 
			
		||||
    ldr     r3, cospi8sqrt2minus1
 | 
			
		||||
    ldr     r4, sinpi8sqrt2
 | 
			
		||||
    ldr     r6, [r0, #8]
 | 
			
		||||
    mov     r5, #2
 | 
			
		||||
vp8_dequant_idct_loop1_v6
 | 
			
		||||
    ldr     r12, [r0, #24]
 | 
			
		||||
    ldr     r14, [r0, #16]
 | 
			
		||||
    smulwt  r9, r3, r6
 | 
			
		||||
    smulwb  r7, r3, r6
 | 
			
		||||
    smulwt  r10, r4, r6
 | 
			
		||||
    smulwb  r8, r4, r6
 | 
			
		||||
    pkhbt   r7, r7, r9, lsl #16
 | 
			
		||||
    smulwt  r11, r3, r12
 | 
			
		||||
    pkhbt   r8, r8, r10, lsl #16
 | 
			
		||||
    uadd16  r6, r6, r7
 | 
			
		||||
    smulwt  r7, r4, r12
 | 
			
		||||
    smulwb  r9, r3, r12
 | 
			
		||||
    smulwb  r10, r4, r12
 | 
			
		||||
    subs    r5, r5, #1
 | 
			
		||||
    pkhbt   r9, r9, r11, lsl #16
 | 
			
		||||
    ldr     r11, [r0], #4
 | 
			
		||||
    pkhbt   r10, r10, r7, lsl #16
 | 
			
		||||
    uadd16  r7, r12, r9
 | 
			
		||||
    usub16  r7, r8, r7
 | 
			
		||||
    uadd16  r6, r6, r10
 | 
			
		||||
    uadd16  r10, r11, r14
 | 
			
		||||
    usub16  r8, r11, r14
 | 
			
		||||
    uadd16  r9, r10, r6
 | 
			
		||||
    usub16  r10, r10, r6
 | 
			
		||||
    uadd16  r6, r8, r7
 | 
			
		||||
    usub16  r7, r8, r7
 | 
			
		||||
    str     r6, [r1, #8]
 | 
			
		||||
    ldrne   r6, [r0, #8]
 | 
			
		||||
    str     r7, [r1, #16]
 | 
			
		||||
    str     r10, [r1, #24]
 | 
			
		||||
    str     r9, [r1], #4
 | 
			
		||||
    bne     vp8_dequant_idct_loop1_v6
 | 
			
		||||
 | 
			
		||||
    mov     r5, #2
 | 
			
		||||
    sub     r0, r1, #8
 | 
			
		||||
vp8_dequant_idct_loop2_v6
 | 
			
		||||
    ldr     r6, [r0], #4
 | 
			
		||||
    ldr     r7, [r0], #4
 | 
			
		||||
    ldr     r8, [r0], #4
 | 
			
		||||
    ldr     r9, [r0], #4
 | 
			
		||||
    smulwt  r1, r3, r6
 | 
			
		||||
    smulwt  r12, r4, r6
 | 
			
		||||
    smulwt  lr, r3, r8
 | 
			
		||||
    smulwt  r10, r4, r8
 | 
			
		||||
    pkhbt   r11, r8, r6, lsl #16
 | 
			
		||||
    pkhbt   r1, lr, r1, lsl #16
 | 
			
		||||
    pkhbt   r12, r10, r12, lsl #16
 | 
			
		||||
    pkhtb   r6, r6, r8, asr #16
 | 
			
		||||
    uadd16  r6, r1, r6
 | 
			
		||||
    pkhbt   lr, r9, r7, lsl #16
 | 
			
		||||
    uadd16  r10, r11, lr
 | 
			
		||||
    usub16  lr, r11, lr
 | 
			
		||||
    pkhtb   r8, r7, r9, asr #16
 | 
			
		||||
    subs    r5, r5, #1
 | 
			
		||||
    smulwt  r1, r3, r8
 | 
			
		||||
    smulwb  r7, r3, r8
 | 
			
		||||
    smulwt  r11, r4, r8
 | 
			
		||||
    smulwb  r9, r4, r8
 | 
			
		||||
    pkhbt   r1, r7, r1, lsl #16
 | 
			
		||||
    uadd16  r8, r1, r8
 | 
			
		||||
    pkhbt   r11, r9, r11, lsl #16
 | 
			
		||||
    usub16  r1, r12, r8
 | 
			
		||||
    uadd16  r8, r11, r6
 | 
			
		||||
    ldr     r9, c0x00040004
 | 
			
		||||
    ldr     r12, [sp, #40]
 | 
			
		||||
    uadd16  r6, r10, r8
 | 
			
		||||
    usub16  r7, r10, r8
 | 
			
		||||
    uadd16  r7, r7, r9
 | 
			
		||||
    uadd16  r6, r6, r9
 | 
			
		||||
    uadd16  r10, r14, r1
 | 
			
		||||
    usub16  r1, r14, r1
 | 
			
		||||
    uadd16  r10, r10, r9
 | 
			
		||||
    uadd16  r1, r1, r9
 | 
			
		||||
    ldr     r11, [r2], r12
 | 
			
		||||
    mov     r8, r7, asr #3
 | 
			
		||||
    pkhtb   r9, r8, r10, asr #19
 | 
			
		||||
    mov     r8, r1, asr #3
 | 
			
		||||
    pkhtb   r8, r8, r6, asr #19
 | 
			
		||||
    uxtb16  lr, r11, ror #8
 | 
			
		||||
    qadd16  r9, r9, lr
 | 
			
		||||
    uxtb16  lr, r11
 | 
			
		||||
    qadd16  r8, r8, lr
 | 
			
		||||
    usat16  r9, #8, r9
 | 
			
		||||
    usat16  r8, #8, r8
 | 
			
		||||
    orr     r9, r8, r9, lsl #8
 | 
			
		||||
    ldr     r11, [r2], r12
 | 
			
		||||
    ldr     lr, [sp]
 | 
			
		||||
    ldr     r12, [sp, #44]
 | 
			
		||||
    mov     r7, r7, lsl #16
 | 
			
		||||
    mov     r1, r1, lsl #16
 | 
			
		||||
    mov     r10, r10, lsl #16
 | 
			
		||||
    mov     r6, r6, lsl #16
 | 
			
		||||
    mov     r7, r7, asr #3
 | 
			
		||||
    pkhtb   r7, r7, r10, asr #19
 | 
			
		||||
    mov     r1, r1, asr #3
 | 
			
		||||
    pkhtb   r1, r1, r6, asr #19
 | 
			
		||||
    uxtb16  r8, r11, ror #8
 | 
			
		||||
    qadd16  r7, r7, r8
 | 
			
		||||
    uxtb16  r8, r11
 | 
			
		||||
    qadd16  r1, r1, r8
 | 
			
		||||
    usat16  r7, #8, r7
 | 
			
		||||
    usat16  r1, #8, r1
 | 
			
		||||
    orr     r1, r1, r7, lsl #8
 | 
			
		||||
    str     r9, [lr], r12
 | 
			
		||||
    str     r1, [lr], r12
 | 
			
		||||
    str     lr, [sp]
 | 
			
		||||
    bne     vp8_dequant_idct_loop2_v6
 | 
			
		||||
 | 
			
		||||
; vpx_memset
 | 
			
		||||
    sub     r0, r0, #32
 | 
			
		||||
    add     sp, sp, #4
 | 
			
		||||
 | 
			
		||||
    mov     r12, #0
 | 
			
		||||
    str     r12, [r0]
 | 
			
		||||
    str     r12, [r0, #4]
 | 
			
		||||
    str     r12, [r0, #8]
 | 
			
		||||
    str     r12, [r0, #12]
 | 
			
		||||
    str     r12, [r0, #16]
 | 
			
		||||
    str     r12, [r0, #20]
 | 
			
		||||
    str     r12, [r0, #24]
 | 
			
		||||
    str     r12, [r0, #28]
 | 
			
		||||
 | 
			
		||||
    ldmia   sp!, {r4 - r11, pc}
 | 
			
		||||
    ENDP    ; |vp8_dequant_idct_add_v6|
 | 
			
		||||
 | 
			
		||||
; Constant Pool
 | 
			
		||||
cospi8sqrt2minus1 DCD 0x00004E7B
 | 
			
		||||
sinpi8sqrt2       DCD 0x00008A8C
 | 
			
		||||
c0x00040004       DCD 0x00040004
 | 
			
		||||
 | 
			
		||||
    END
 | 
			
		||||
@@ -1,203 +0,0 @@
 | 
			
		||||
;
 | 
			
		||||
;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 | 
			
		||||
;
 | 
			
		||||
;  Use of this source code is governed by a BSD-style license
 | 
			
		||||
;  that can be found in the LICENSE file in the root of the source
 | 
			
		||||
;  tree. An additional intellectual property rights grant can be found
 | 
			
		||||
;  in the file PATENTS.  All contributing project authors may
 | 
			
		||||
;  be found in the AUTHORS file in the root of the source tree.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    EXPORT  |vp8_dequant_dc_idct_v6|
 | 
			
		||||
    ; ARM
 | 
			
		||||
    ; REQUIRE8
 | 
			
		||||
    ; PRESERVE8
 | 
			
		||||
 | 
			
		||||
    AREA    |.text|, CODE, READONLY  ; name this block of code
 | 
			
		||||
;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc)
 | 
			
		||||
|vp8_dequant_dc_idct_v6| PROC
 | 
			
		||||
    stmdb   sp!, {r4-r11, lr}
 | 
			
		||||
 | 
			
		||||
    ldr     r6, [sp, #36]           ;load Dc
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0]                ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    sub     sp, sp, #4
 | 
			
		||||
    str     r0, [sp]
 | 
			
		||||
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    mov     r12, #3
 | 
			
		||||
 | 
			
		||||
dequant_dc_idct_loop
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    subs    r12, r12, #1
 | 
			
		||||
 | 
			
		||||
    ldrne   r4, [r0, #4]
 | 
			
		||||
    ldrne   r5, [r1], #4
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    bne     dequant_dc_idct_loop
 | 
			
		||||
 | 
			
		||||
    sub     r0, r0, #32
 | 
			
		||||
    mov     r1, r2
 | 
			
		||||
    mov     r2, r3
 | 
			
		||||
 | 
			
		||||
; short_idct4x4llm_v6_dual
 | 
			
		||||
 | 
			
		||||
    mov r3, #0x00004E00 ;                   cos
 | 
			
		||||
    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
 | 
			
		||||
    mov r4, #0x00008A00 ;                       sin
 | 
			
		||||
    orr r4, r4, #0x0000008C ; sinpi8sqrt2
 | 
			
		||||
    mov r5, #0x2    ; i=2                           i
 | 
			
		||||
loop1_dual_11
 | 
			
		||||
    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
 | 
			
		||||
    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
 | 
			
		||||
    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
 | 
			
		||||
 | 
			
		||||
    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
 | 
			
		||||
    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
 | 
			
		||||
    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
 | 
			
		||||
    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
 | 
			
		||||
    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
 | 
			
		||||
    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
 | 
			
		||||
    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
 | 
			
		||||
    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
 | 
			
		||||
    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
 | 
			
		||||
    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
 | 
			
		||||
    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
 | 
			
		||||
    subs    r5, r5, #0x1    ; i--                           --
 | 
			
		||||
    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
 | 
			
		||||
    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
 | 
			
		||||
    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
 | 
			
		||||
    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
 | 
			
		||||
    usub16  r7, r8, r7  ; c                                 c
 | 
			
		||||
    uadd16  r6, r6, r10 ; d                             d
 | 
			
		||||
    uadd16  r10, r11, r14   ; a                                             a
 | 
			
		||||
    usub16  r8, r11, r14    ; b                                     b
 | 
			
		||||
    uadd16  r9, r10, r6 ; a+d                                           a+d
 | 
			
		||||
    usub16  r10, r10, r6    ; a-d                                               a-d
 | 
			
		||||
    uadd16  r6, r8, r7  ; b+c                               b+c
 | 
			
		||||
    usub16  r7, r8, r7  ; b-c                                   b-c
 | 
			
		||||
    str r6, [r1, r2]    ; o5 | o4
 | 
			
		||||
    add r6, r2, r2  ; pitch * 2                             p2
 | 
			
		||||
    str r7, [r1, r6]    ; o9 | o8
 | 
			
		||||
    add r6,  r6, r2 ; pitch * 3                             p3
 | 
			
		||||
    str r10, [r1, r6]   ; o13 | o12
 | 
			
		||||
    str r9, [r1], #0x4  ; o1 | o0           ++
 | 
			
		||||
    bne loop1_dual_11   ;
 | 
			
		||||
    mov r5, #0x2    ; i=2                           i
 | 
			
		||||
    sub r0, r1, #8  ; reset input/output        i/o
 | 
			
		||||
loop2_dual_22
 | 
			
		||||
    ldr r6, [r0, r2]    ; i5 | i4                               5|4
 | 
			
		||||
    ldr r1, [r0]    ; i1 | i0           1|0
 | 
			
		||||
    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
 | 
			
		||||
    add r14, r2, #0x4   ; pitch + 2                                                             p+2
 | 
			
		||||
    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
 | 
			
		||||
    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
 | 
			
		||||
    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
 | 
			
		||||
    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
 | 
			
		||||
    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
 | 
			
		||||
    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
 | 
			
		||||
    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
 | 
			
		||||
    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1 <20>                                     tc1
 | 
			
		||||
    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
 | 
			
		||||
    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
 | 
			
		||||
    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
 | 
			
		||||
    uadd16  r10, r11, r9    ; a                                             a
 | 
			
		||||
    usub16  r9, r11, r9 ; b                                         b
 | 
			
		||||
    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
 | 
			
		||||
    subs    r5, r5, #0x1    ; i--                           --
 | 
			
		||||
    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
 | 
			
		||||
    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
 | 
			
		||||
    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
 | 
			
		||||
    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
 | 
			
		||||
 | 
			
		||||
    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
 | 
			
		||||
    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
 | 
			
		||||
    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
 | 
			
		||||
    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
 | 
			
		||||
    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
 | 
			
		||||
    uadd16  r7, r10, r6 ; a+d                                   a+d
 | 
			
		||||
    mov r8, #0x4    ; set up 4's                                        4
 | 
			
		||||
    orr r8, r8, #0x40000    ;                                       4|4
 | 
			
		||||
    usub16  r6, r10, r6 ; a-d                               a-d
 | 
			
		||||
    uadd16  r6, r6, r8  ; a-d+4                             3|7
 | 
			
		||||
    uadd16  r7, r7, r8  ; a+d+4                                 0|4
 | 
			
		||||
    uadd16  r10, r9, r12    ; b+c                                               b+c
 | 
			
		||||
    usub16  r1, r9, r12 ; b-c           b-c
 | 
			
		||||
    uadd16  r10, r10, r8    ; b+c+4                                             1|5
 | 
			
		||||
    uadd16  r1, r1, r8  ; b-c+4         2|6
 | 
			
		||||
    mov r8, r10, asr #19    ; o1 >> 3
 | 
			
		||||
    strh    r8, [r0, #2]    ; o1
 | 
			
		||||
    mov r8, r1, asr #19 ; o2 >> 3
 | 
			
		||||
    strh    r8, [r0, #4]    ; o2
 | 
			
		||||
    mov r8, r6, asr #19 ; o3 >> 3
 | 
			
		||||
    strh    r8, [r0, #6]    ; o3
 | 
			
		||||
    mov r8, r7, asr #19 ; o0 >> 3
 | 
			
		||||
    strh    r8, [r0], r2    ; o0        +p
 | 
			
		||||
    sxth    r10, r10    ;
 | 
			
		||||
    mov r8, r10, asr #3 ; o5 >> 3
 | 
			
		||||
    strh    r8, [r0, #2]    ; o5
 | 
			
		||||
    sxth    r1, r1  ;
 | 
			
		||||
    mov r8, r1, asr #3  ; o6 >> 3
 | 
			
		||||
    strh    r8, [r0, #4]    ; o6
 | 
			
		||||
    sxth    r6, r6  ;
 | 
			
		||||
    mov r8, r6, asr #3  ; o7 >> 3
 | 
			
		||||
    strh    r8, [r0, #6]    ; o7
 | 
			
		||||
    sxth    r7, r7  ;
 | 
			
		||||
    mov r8, r7, asr #3  ; o4 >> 3
 | 
			
		||||
    strh    r8, [r0], r2    ; o4        +p
 | 
			
		||||
;;;;;   subs    r5, r5, #0x1    ; i--                           --
 | 
			
		||||
    bne loop2_dual_22   ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
;vpx_memset
 | 
			
		||||
    ldr     r0, [sp]
 | 
			
		||||
    add     sp, sp, #4
 | 
			
		||||
 | 
			
		||||
    mov     r12, #0
 | 
			
		||||
    str     r12, [r0]
 | 
			
		||||
    str     r12, [r0, #4]
 | 
			
		||||
    str     r12, [r0, #8]
 | 
			
		||||
    str     r12, [r0, #12]
 | 
			
		||||
    str     r12, [r0, #16]
 | 
			
		||||
    str     r12, [r0, #20]
 | 
			
		||||
    str     r12, [r0, #24]
 | 
			
		||||
    str     r12, [r0, #28]
 | 
			
		||||
 | 
			
		||||
    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
 | 
			
		||||
 | 
			
		||||
    ENDP    ;|vp8_dequant_dc_idct_v68|
 | 
			
		||||
 | 
			
		||||
    END
 | 
			
		||||
@@ -1,184 +0,0 @@
 | 
			
		||||
;
 | 
			
		||||
;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 | 
			
		||||
;
 | 
			
		||||
;  Use of this source code is governed by a BSD-style license
 | 
			
		||||
;  that can be found in the LICENSE file in the root of the source
 | 
			
		||||
;  tree. An additional intellectual property rights grant can be found
 | 
			
		||||
;  in the file PATENTS.  All contributing project authors may
 | 
			
		||||
;  be found in the AUTHORS file in the root of the source tree.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    EXPORT  |vp8_dequant_idct_v6|
 | 
			
		||||
    ; ARM
 | 
			
		||||
    ; REQUIRE8
 | 
			
		||||
    ; PRESERVE8
 | 
			
		||||
 | 
			
		||||
    AREA    |.text|, CODE, READONLY  ; name this block of code
 | 
			
		||||
;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch)
 | 
			
		||||
|vp8_dequant_idct_v6| PROC
 | 
			
		||||
    stmdb   sp!, {r4-r11, lr}
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4            ;dq
 | 
			
		||||
 | 
			
		||||
    sub     sp, sp, #4
 | 
			
		||||
    str     r0, [sp]
 | 
			
		||||
 | 
			
		||||
    mov     r12, #4
 | 
			
		||||
 | 
			
		||||
dequant_idct_loop
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    ldr     r4, [r0, #4]            ;input
 | 
			
		||||
    ldr     r5, [r1], #4        ;dq
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    smulbb  r6, r4, r5
 | 
			
		||||
    smultt  r7, r4, r5
 | 
			
		||||
 | 
			
		||||
    subs    r12, r12, #1
 | 
			
		||||
 | 
			
		||||
    ldrne   r4, [r0, #4]
 | 
			
		||||
    ldrne   r5, [r1], #4
 | 
			
		||||
 | 
			
		||||
    strh    r6, [r0], #2
 | 
			
		||||
    strh    r7, [r0], #2
 | 
			
		||||
 | 
			
		||||
    bne     dequant_idct_loop
 | 
			
		||||
 | 
			
		||||
    sub     r0, r0, #32
 | 
			
		||||
    mov     r1, r2
 | 
			
		||||
    mov     r2, r3
 | 
			
		||||
 | 
			
		||||
; short_idct4x4llm_v6_dual
 | 
			
		||||
 | 
			
		||||
    mov r3, #0x00004E00 ;                   cos
 | 
			
		||||
    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
 | 
			
		||||
    mov r4, #0x00008A00 ;                       sin
 | 
			
		||||
    orr r4, r4, #0x0000008C ; sinpi8sqrt2
 | 
			
		||||
    mov r5, #0x2    ; i=2                           i
 | 
			
		||||
loop1_dual_1
 | 
			
		||||
    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
 | 
			
		||||
    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
 | 
			
		||||
    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
 | 
			
		||||
 | 
			
		||||
    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
 | 
			
		||||
    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
 | 
			
		||||
    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
 | 
			
		||||
    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
 | 
			
		||||
    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
 | 
			
		||||
    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
 | 
			
		||||
    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
 | 
			
		||||
    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
 | 
			
		||||
    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
 | 
			
		||||
    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
 | 
			
		||||
    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
 | 
			
		||||
    subs    r5, r5, #0x1    ; i--                           --
 | 
			
		||||
    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
 | 
			
		||||
    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
 | 
			
		||||
    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
 | 
			
		||||
    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
 | 
			
		||||
    usub16  r7, r8, r7  ; c                                 c
 | 
			
		||||
    uadd16  r6, r6, r10 ; d                             d
 | 
			
		||||
    uadd16  r10, r11, r14   ; a                                             a
 | 
			
		||||
    usub16  r8, r11, r14    ; b                                     b
 | 
			
		||||
    uadd16  r9, r10, r6 ; a+d                                           a+d
 | 
			
		||||
    usub16  r10, r10, r6    ; a-d                                               a-d
 | 
			
		||||
    uadd16  r6, r8, r7  ; b+c                               b+c
 | 
			
		||||
    usub16  r7, r8, r7  ; b-c                                   b-c
 | 
			
		||||
    str r6, [r1, r2]    ; o5 | o4
 | 
			
		||||
    add r6, r2, r2  ; pitch * 2                             p2
 | 
			
		||||
    str r7, [r1, r6]    ; o9 | o8
 | 
			
		||||
    add r6,  r6, r2 ; pitch * 3                             p3
 | 
			
		||||
    str r10, [r1, r6]   ; o13 | o12
 | 
			
		||||
    str r9, [r1], #0x4  ; o1 | o0           ++
 | 
			
		||||
    bne loop1_dual_1    ;
 | 
			
		||||
    mov r5, #0x2    ; i=2                           i
 | 
			
		||||
    sub r0, r1, #8  ; reset input/output        i/o
 | 
			
		||||
loop2_dual_2
 | 
			
		||||
    ldr r6, [r0, r2]    ; i5 | i4                               5|4
 | 
			
		||||
    ldr r1, [r0]    ; i1 | i0           1|0
 | 
			
		||||
    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
 | 
			
		||||
    add r14, r2, #0x4   ; pitch + 2                                                             p+2
 | 
			
		||||
    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
 | 
			
		||||
    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
 | 
			
		||||
    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
 | 
			
		||||
    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
 | 
			
		||||
    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
 | 
			
		||||
    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
 | 
			
		||||
    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
 | 
			
		||||
    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1 <20>                                     tc1
 | 
			
		||||
    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
 | 
			
		||||
    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
 | 
			
		||||
    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
 | 
			
		||||
    uadd16  r10, r11, r9    ; a                                             a
 | 
			
		||||
    usub16  r9, r11, r9 ; b                                         b
 | 
			
		||||
    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
 | 
			
		||||
    subs    r5, r5, #0x1    ; i--                           --
 | 
			
		||||
    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
 | 
			
		||||
    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
 | 
			
		||||
    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
 | 
			
		||||
    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
 | 
			
		||||
 | 
			
		||||
    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
 | 
			
		||||
    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
 | 
			
		||||
    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
 | 
			
		||||
    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
 | 
			
		||||
    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
 | 
			
		||||
    uadd16  r7, r10, r6 ; a+d                                   a+d
 | 
			
		||||
    mov r8, #0x4    ; set up 4's                                        4
 | 
			
		||||
    orr r8, r8, #0x40000    ;                                       4|4
 | 
			
		||||
    usub16  r6, r10, r6 ; a-d                               a-d
 | 
			
		||||
    uadd16  r6, r6, r8  ; a-d+4                             3|7
 | 
			
		||||
    uadd16  r7, r7, r8  ; a+d+4                                 0|4
 | 
			
		||||
    uadd16  r10, r9, r12    ; b+c                                               b+c
 | 
			
		||||
    usub16  r1, r9, r12 ; b-c           b-c
 | 
			
		||||
    uadd16  r10, r10, r8    ; b+c+4                                             1|5
 | 
			
		||||
    uadd16  r1, r1, r8  ; b-c+4         2|6
 | 
			
		||||
    mov r8, r10, asr #19    ; o1 >> 3
 | 
			
		||||
    strh    r8, [r0, #2]    ; o1
 | 
			
		||||
    mov r8, r1, asr #19 ; o2 >> 3
 | 
			
		||||
    strh    r8, [r0, #4]    ; o2
 | 
			
		||||
    mov r8, r6, asr #19 ; o3 >> 3
 | 
			
		||||
    strh    r8, [r0, #6]    ; o3
 | 
			
		||||
    mov r8, r7, asr #19 ; o0 >> 3
 | 
			
		||||
    strh    r8, [r0], r2    ; o0        +p
 | 
			
		||||
    sxth    r10, r10    ;
 | 
			
		||||
    mov r8, r10, asr #3 ; o5 >> 3
 | 
			
		||||
    strh    r8, [r0, #2]    ; o5
 | 
			
		||||
    sxth    r1, r1  ;
 | 
			
		||||
    mov r8, r1, asr #3  ; o6 >> 3
 | 
			
		||||
    strh    r8, [r0, #4]    ; o6
 | 
			
		||||
    sxth    r6, r6  ;
 | 
			
		||||
    mov r8, r6, asr #3  ; o7 >> 3
 | 
			
		||||
    strh    r8, [r0, #6]    ; o7
 | 
			
		||||
    sxth    r7, r7  ;
 | 
			
		||||
    mov r8, r7, asr #3  ; o4 >> 3
 | 
			
		||||
    strh    r8, [r0], r2    ; o4        +p
 | 
			
		||||
;;;;;   subs    r5, r5, #0x1    ; i--                           --
 | 
			
		||||
    bne loop2_dual_2    ;
 | 
			
		||||
            ;
 | 
			
		||||
 | 
			
		||||
;vpx_memset
 | 
			
		||||
    ldr     r0, [sp]
 | 
			
		||||
    add     sp, sp, #4
 | 
			
		||||
 | 
			
		||||
    mov     r12, #0
 | 
			
		||||
    str     r12, [r0]
 | 
			
		||||
    str     r12, [r0, #4]
 | 
			
		||||
    str     r12, [r0, #8]
 | 
			
		||||
    str     r12, [r0, #12]
 | 
			
		||||
    str     r12, [r0, #16]
 | 
			
		||||
    str     r12, [r0, #20]
 | 
			
		||||
    str     r12, [r0, #24]
 | 
			
		||||
    str     r12, [r0, #28]
 | 
			
		||||
 | 
			
		||||
    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
 | 
			
		||||
 | 
			
		||||
    ENDP    ;|vp8_dequant_idct_v6|
 | 
			
		||||
 | 
			
		||||
    END
 | 
			
		||||
@@ -14,14 +14,32 @@
 | 
			
		||||
 | 
			
		||||
#if HAVE_ARMV6
 | 
			
		||||
extern prototype_dequant_block(vp8_dequantize_b_v6);
 | 
			
		||||
extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
 | 
			
		||||
extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
 | 
			
		||||
 | 
			
		||||
#undef  vp8_dequant_block
 | 
			
		||||
#define vp8_dequant_block vp8_dequantize_b_v6
 | 
			
		||||
 | 
			
		||||
#undef vp8_dequant_idct_add
 | 
			
		||||
#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
 | 
			
		||||
 | 
			
		||||
#undef vp8_dequant_dc_idct_add
 | 
			
		||||
#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if HAVE_ARMV7
 | 
			
		||||
extern prototype_dequant_block(vp8_dequantize_b_neon);
 | 
			
		||||
extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
 | 
			
		||||
extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
 | 
			
		||||
 | 
			
		||||
#undef  vp8_dequant_block
 | 
			
		||||
#define vp8_dequant_block vp8_dequantize_b_neon
 | 
			
		||||
 | 
			
		||||
#undef vp8_dequant_idct_add
 | 
			
		||||
#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
 | 
			
		||||
 | 
			
		||||
#undef vp8_dequant_dc_idct_add
 | 
			
		||||
#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -9,31 +9,43 @@
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    EXPORT  |vp8_dequant_dc_idct_neon|
 | 
			
		||||
    EXPORT  |vp8_dequant_dc_idct_add_neon|
 | 
			
		||||
    ARM
 | 
			
		||||
    REQUIRE8
 | 
			
		||||
    PRESERVE8
 | 
			
		||||
 | 
			
		||||
    AREA ||.text||, CODE, READONLY, ALIGN=2
 | 
			
		||||
;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc);
 | 
			
		||||
;void vp8_dequant_dc_idct_add_neon(short *input, short *dq, unsigned char *pred,
 | 
			
		||||
;                                  unsigned char *dest, int pitch, int stride,
 | 
			
		||||
;                                  int Dc);
 | 
			
		||||
; r0    short *input,
 | 
			
		||||
; r1    short *dq,
 | 
			
		||||
; r2    short *output,
 | 
			
		||||
; r3    int pitch,
 | 
			
		||||
; (stack)   int Dc
 | 
			
		||||
|vp8_dequant_dc_idct_neon| PROC
 | 
			
		||||
; r2    unsigned char *pred
 | 
			
		||||
; r3    unsigned char *dest
 | 
			
		||||
; sp    int pitch
 | 
			
		||||
; sp+4  int stride
 | 
			
		||||
; sp+8  int Dc
 | 
			
		||||
|vp8_dequant_dc_idct_add_neon| PROC
 | 
			
		||||
    vld1.16         {q3, q4}, [r0]
 | 
			
		||||
    vld1.16         {q5, q6}, [r1]
 | 
			
		||||
 | 
			
		||||
    ldr             r1, [sp]                ;load Dc from stack
 | 
			
		||||
    ldr             r1, [sp, #8]            ;load Dc from stack
 | 
			
		||||
 | 
			
		||||
    ldr             r12, _dcidct_coeff_
 | 
			
		||||
    ldr             r12, _CONSTANTS_
 | 
			
		||||
 | 
			
		||||
    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
 | 
			
		||||
    vmul.i16        q2, q4, q6
 | 
			
		||||
 | 
			
		||||
    vmov.16         d2[0], r1
 | 
			
		||||
 | 
			
		||||
    ldr             r1, [sp]                ; pitch
 | 
			
		||||
    vld1.32         {d14[0]}, [r2], r1
 | 
			
		||||
    vld1.32         {d14[1]}, [r2], r1
 | 
			
		||||
    vld1.32         {d15[0]}, [r2], r1
 | 
			
		||||
    vld1.32         {d15[1]}, [r2]
 | 
			
		||||
 | 
			
		||||
    ldr             r1, [sp, #4]            ; stride
 | 
			
		||||
 | 
			
		||||
;|short_idct4x4llm_neon| PROC
 | 
			
		||||
    vld1.16         {d0}, [r12]
 | 
			
		||||
    vswp            d3, d4                  ;q2(vp[4] vp[12])
 | 
			
		||||
@@ -47,14 +59,9 @@
 | 
			
		||||
    vshr.s16        q3, q3, #1
 | 
			
		||||
    vshr.s16        q4, q4, #1
 | 
			
		||||
 | 
			
		||||
    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
 | 
			
		||||
    vqadd.s16       q3, q3, q2
 | 
			
		||||
    vqadd.s16       q4, q4, q2
 | 
			
		||||
 | 
			
		||||
    ;d6 - c1:temp1
 | 
			
		||||
    ;d7 - d1:temp2
 | 
			
		||||
    ;d8 - d1:temp1
 | 
			
		||||
    ;d9 - c1:temp2
 | 
			
		||||
 | 
			
		||||
    vqsub.s16       d10, d6, d9             ;c1
 | 
			
		||||
    vqadd.s16       d11, d7, d8             ;d1
 | 
			
		||||
 | 
			
		||||
@@ -83,7 +90,7 @@
 | 
			
		||||
    vshr.s16        q3, q3, #1
 | 
			
		||||
    vshr.s16        q4, q4, #1
 | 
			
		||||
 | 
			
		||||
    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
 | 
			
		||||
    vqadd.s16       q3, q3, q2
 | 
			
		||||
    vqadd.s16       q4, q4, q2
 | 
			
		||||
 | 
			
		||||
    vqsub.s16       d10, d6, d9             ;c1
 | 
			
		||||
@@ -101,34 +108,29 @@
 | 
			
		||||
    vrshr.s16       d4, d4, #3
 | 
			
		||||
    vrshr.s16       d5, d5, #3
 | 
			
		||||
 | 
			
		||||
    add             r1, r2, r3
 | 
			
		||||
    add             r12, r1, r3
 | 
			
		||||
    add             r0, r12, r3
 | 
			
		||||
 | 
			
		||||
    vtrn.32         d2, d4
 | 
			
		||||
    vtrn.32         d3, d5
 | 
			
		||||
    vtrn.16         d2, d3
 | 
			
		||||
    vtrn.16         d4, d5
 | 
			
		||||
 | 
			
		||||
    vst1.16         {d2}, [r2]
 | 
			
		||||
    vst1.16         {d3}, [r1]
 | 
			
		||||
    vst1.16         {d4}, [r12]
 | 
			
		||||
    vst1.16         {d5}, [r0]
 | 
			
		||||
    vaddw.u8        q1, q1, d14
 | 
			
		||||
    vaddw.u8        q2, q2, d15
 | 
			
		||||
 | 
			
		||||
    vqmovun.s16     d0, q1
 | 
			
		||||
    vqmovun.s16     d1, q2
 | 
			
		||||
 | 
			
		||||
    vst1.32         {d0[0]}, [r3], r1
 | 
			
		||||
    vst1.32         {d0[1]}, [r3], r1
 | 
			
		||||
    vst1.32         {d1[0]}, [r3], r1
 | 
			
		||||
    vst1.32         {d1[1]}, [r3]
 | 
			
		||||
 | 
			
		||||
    bx             lr
 | 
			
		||||
 | 
			
		||||
    ENDP
 | 
			
		||||
    ENDP           ; |vp8_dequant_dc_idct_add_neon|
 | 
			
		||||
 | 
			
		||||
;-----------------
 | 
			
		||||
    AREA    dcidct4x4_dat, DATA, READWRITE          ;read/write by default
 | 
			
		||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
 | 
			
		||||
;One word each is reserved. Label filter_coeff can be used to access the data.
 | 
			
		||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
 | 
			
		||||
_dcidct_coeff_
 | 
			
		||||
    DCD     dcidct_coeff
 | 
			
		||||
dcidct_coeff
 | 
			
		||||
    DCD     0x4e7b4e7b, 0x8a8c8a8c
 | 
			
		||||
 | 
			
		||||
;20091, 20091, 35468, 35468
 | 
			
		||||
; Constant Pool
 | 
			
		||||
_CONSTANTS_       DCD cospi8sqrt2minus1
 | 
			
		||||
cospi8sqrt2minus1 DCD 0x4e7b4e7b
 | 
			
		||||
sinpi8sqrt2       DCD 0x8a8c8a8c
 | 
			
		||||
 | 
			
		||||
    END
 | 
			
		||||
@@ -9,22 +9,33 @@
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    EXPORT  |vp8_dequant_idct_neon|
 | 
			
		||||
    EXPORT  |vp8_dequant_idct_add_neon|
 | 
			
		||||
    ARM
 | 
			
		||||
    REQUIRE8
 | 
			
		||||
    PRESERVE8
 | 
			
		||||
 | 
			
		||||
    AREA ||.text||, CODE, READONLY, ALIGN=2
 | 
			
		||||
;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch);
 | 
			
		||||
;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
 | 
			
		||||
;                           unsigned char *dest, int pitch, int stride)
 | 
			
		||||
; r0    short *input,
 | 
			
		||||
; r1    short *dq,
 | 
			
		||||
; r2    short *output,
 | 
			
		||||
; r3    int pitch,
 | 
			
		||||
|vp8_dequant_idct_neon| PROC
 | 
			
		||||
; r2    unsigned char *pred
 | 
			
		||||
; r3    unsigned char *dest
 | 
			
		||||
; sp    int pitch
 | 
			
		||||
; sp+4  int stride
 | 
			
		||||
 | 
			
		||||
|vp8_dequant_idct_add_neon| PROC
 | 
			
		||||
    vld1.16         {q3, q4}, [r0]
 | 
			
		||||
    vld1.16         {q5, q6}, [r1]
 | 
			
		||||
    ldr             r1, [sp]                ; pitch
 | 
			
		||||
    vld1.32         {d14[0]}, [r2], r1
 | 
			
		||||
    vld1.32         {d14[1]}, [r2], r1
 | 
			
		||||
    vld1.32         {d15[0]}, [r2], r1
 | 
			
		||||
    vld1.32         {d15[1]}, [r2]
 | 
			
		||||
 | 
			
		||||
    ldr             r12, _didct_coeff_
 | 
			
		||||
    ldr             r1, [sp, #4]            ; stride
 | 
			
		||||
 | 
			
		||||
    ldr             r12, _CONSTANTS_
 | 
			
		||||
 | 
			
		||||
    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
 | 
			
		||||
    vmul.i16        q2, q4, q6
 | 
			
		||||
@@ -42,14 +53,9 @@
 | 
			
		||||
    vshr.s16        q3, q3, #1
 | 
			
		||||
    vshr.s16        q4, q4, #1
 | 
			
		||||
 | 
			
		||||
    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
 | 
			
		||||
    vqadd.s16       q3, q3, q2
 | 
			
		||||
    vqadd.s16       q4, q4, q2
 | 
			
		||||
 | 
			
		||||
    ;d6 - c1:temp1
 | 
			
		||||
    ;d7 - d1:temp2
 | 
			
		||||
    ;d8 - d1:temp1
 | 
			
		||||
    ;d9 - c1:temp2
 | 
			
		||||
 | 
			
		||||
    vqsub.s16       d10, d6, d9             ;c1
 | 
			
		||||
    vqadd.s16       d11, d7, d8             ;d1
 | 
			
		||||
 | 
			
		||||
@@ -78,7 +84,7 @@
 | 
			
		||||
    vshr.s16        q3, q3, #1
 | 
			
		||||
    vshr.s16        q4, q4, #1
 | 
			
		||||
 | 
			
		||||
    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
 | 
			
		||||
    vqadd.s16       q3, q3, q2
 | 
			
		||||
    vqadd.s16       q4, q4, q2
 | 
			
		||||
 | 
			
		||||
    vqsub.s16       d10, d6, d9             ;c1
 | 
			
		||||
@@ -96,34 +102,29 @@
 | 
			
		||||
    vrshr.s16       d4, d4, #3
 | 
			
		||||
    vrshr.s16       d5, d5, #3
 | 
			
		||||
 | 
			
		||||
    add             r1, r2, r3
 | 
			
		||||
    add             r12, r1, r3
 | 
			
		||||
    add             r0, r12, r3
 | 
			
		||||
 | 
			
		||||
    vtrn.32         d2, d4
 | 
			
		||||
    vtrn.32         d3, d5
 | 
			
		||||
    vtrn.16         d2, d3
 | 
			
		||||
    vtrn.16         d4, d5
 | 
			
		||||
 | 
			
		||||
    vst1.16         {d2}, [r2]
 | 
			
		||||
    vst1.16         {d3}, [r1]
 | 
			
		||||
    vst1.16         {d4}, [r12]
 | 
			
		||||
    vst1.16         {d5}, [r0]
 | 
			
		||||
    vaddw.u8        q1, q1, d14
 | 
			
		||||
    vaddw.u8        q2, q2, d15
 | 
			
		||||
 | 
			
		||||
    vqmovun.s16     d0, q1
 | 
			
		||||
    vqmovun.s16     d1, q2
 | 
			
		||||
 | 
			
		||||
    vst1.32         {d0[0]}, [r3], r1
 | 
			
		||||
    vst1.32         {d0[1]}, [r3], r1
 | 
			
		||||
    vst1.32         {d1[0]}, [r3], r1
 | 
			
		||||
    vst1.32         {d1[1]}, [r3]
 | 
			
		||||
 | 
			
		||||
    bx             lr
 | 
			
		||||
 | 
			
		||||
    ENDP
 | 
			
		||||
    ENDP           ; |vp8_dequant_idct_add_neon|
 | 
			
		||||
 | 
			
		||||
;-----------------
 | 
			
		||||
    AREA    didct4x4_dat, DATA, READWRITE           ;read/write by default
 | 
			
		||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
 | 
			
		||||
;One word each is reserved. Label filter_coeff can be used to access the data.
 | 
			
		||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
 | 
			
		||||
_didct_coeff_
 | 
			
		||||
    DCD     didct_coeff
 | 
			
		||||
didct_coeff
 | 
			
		||||
    DCD     0x4e7b4e7b, 0x8a8c8a8c
 | 
			
		||||
 | 
			
		||||
;20091, 20091, 35468, 35468
 | 
			
		||||
; Constant Pool
 | 
			
		||||
_CONSTANTS_       DCD cospi8sqrt2minus1
 | 
			
		||||
cospi8sqrt2minus1 DCD 0x4e7b4e7b
 | 
			
		||||
sinpi8sqrt2       DCD 0x8a8c8a8c
 | 
			
		||||
 | 
			
		||||
    END
 | 
			
		||||
@@ -272,7 +272,9 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
 | 
			
		||||
 | 
			
		||||
            if (b->eob > 1)
 | 
			
		||||
            {
 | 
			
		||||
                DEQUANT_INVOKE(&pbi->dequant, idct_dc_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride,
 | 
			
		||||
                DEQUANT_INVOKE(&pbi->dequant, dc_idct_add)
 | 
			
		||||
                    (b->qcoeff, &b->dequant[0][0], b->predictor,
 | 
			
		||||
                     *(b->base_dst) + b->dst, 16, b->dst_stride,
 | 
			
		||||
                     xd->block[24].diff[i]);
 | 
			
		||||
            }
 | 
			
		||||
            else
 | 
			
		||||
 
 | 
			
		||||
@@ -32,10 +32,10 @@ void vp8_dequantize_b_c(BLOCKD *d)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
 | 
			
		||||
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
 | 
			
		||||
                            unsigned char *dest, int pitch, int stride)
 | 
			
		||||
{
 | 
			
		||||
    // output needs to be at least pitch * 4 for vp8_short_idct4x4llm_c to work properly
 | 
			
		||||
    short output[16*4];
 | 
			
		||||
    short output[16];
 | 
			
		||||
    short *diff_ptr = output;
 | 
			
		||||
    int r, c;
 | 
			
		||||
    int i;
 | 
			
		||||
@@ -45,7 +45,8 @@ void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsign
 | 
			
		||||
        input[i] = dq[i] * input[i];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    vp8_short_idct4x4llm_c(input, output, pitch*2);
 | 
			
		||||
    // the idct halves ( >> 1) the pitch
 | 
			
		||||
    vp8_short_idct4x4llm_c(input, output, 4 << 1);
 | 
			
		||||
 | 
			
		||||
    vpx_memset(input, 0, 32);
 | 
			
		||||
 | 
			
		||||
@@ -65,16 +66,17 @@ void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsign
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        dest += stride;
 | 
			
		||||
        diff_ptr += pitch;
 | 
			
		||||
        diff_ptr += 4;
 | 
			
		||||
        pred += pitch;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
 | 
			
		||||
void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
 | 
			
		||||
                               unsigned char *dest, int pitch, int stride,
 | 
			
		||||
                               int Dc)
 | 
			
		||||
{
 | 
			
		||||
    int i;
 | 
			
		||||
    // output needs to be at least pitch * 4 for vp8_short_idct4x4llm_c to work properly
 | 
			
		||||
    short output[16*4];
 | 
			
		||||
    short output[16];
 | 
			
		||||
    short *diff_ptr = output;
 | 
			
		||||
    int r, c;
 | 
			
		||||
 | 
			
		||||
@@ -85,7 +87,8 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, uns
 | 
			
		||||
        input[i] = dq[i] * input[i];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    vp8_short_idct4x4llm_c(input, output, pitch*2);
 | 
			
		||||
    // the idct halves ( >> 1) the pitch
 | 
			
		||||
    vp8_short_idct4x4llm_c(input, output, 4 << 1);
 | 
			
		||||
 | 
			
		||||
    vpx_memset(input, 0, 32);
 | 
			
		||||
 | 
			
		||||
@@ -105,7 +108,7 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, uns
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        dest += stride;
 | 
			
		||||
        diff_ptr += pitch;
 | 
			
		||||
        diff_ptr += 4;
 | 
			
		||||
        pred += pitch;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,7 @@
 | 
			
		||||
             unsigned char *pred, unsigned char *output, \
 | 
			
		||||
             int pitch, int stride)
 | 
			
		||||
 | 
			
		||||
#define prototype_dequant_idct_dc_add(sym) \
 | 
			
		||||
#define prototype_dequant_dc_idct_add(sym) \
 | 
			
		||||
    void sym(short *input, short *dq, \
 | 
			
		||||
             unsigned char *pred, unsigned char *output, \
 | 
			
		||||
             int pitch, int stride, \
 | 
			
		||||
@@ -45,21 +45,21 @@ extern prototype_dequant_block(vp8_dequant_block);
 | 
			
		||||
#endif
 | 
			
		||||
extern prototype_dequant_idct_add(vp8_dequant_idct_add);
 | 
			
		||||
 | 
			
		||||
#ifndef vp8_dequant_idct_dc_add
 | 
			
		||||
#define vp8_dequant_idct_dc_add vp8_dequant_dc_idct_add_c
 | 
			
		||||
#ifndef vp8_dequant_dc_idct_add
 | 
			
		||||
#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
 | 
			
		||||
#endif
 | 
			
		||||
extern prototype_dequant_idct_dc_add(vp8_dequant_idct_dc_add);
 | 
			
		||||
extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add);
 | 
			
		||||
 | 
			
		||||
typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
 | 
			
		||||
 | 
			
		||||
typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
 | 
			
		||||
typedef prototype_dequant_idct_dc_add((*vp8_dequant_idct_dc_add_fn_t));
 | 
			
		||||
typedef prototype_dequant_dc_idct_add((*vp8_dequant_dc_idct_add_fn_t));
 | 
			
		||||
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
    vp8_dequant_block_fn_t        block;
 | 
			
		||||
    vp8_dequant_idct_add_fn_t     idct_add;
 | 
			
		||||
    vp8_dequant_idct_dc_add_fn_t  idct_dc_add;
 | 
			
		||||
    vp8_dequant_dc_idct_add_fn_t  dc_idct_add;
 | 
			
		||||
} vp8_dequant_rtcd_vtable_t;
 | 
			
		||||
 | 
			
		||||
#if CONFIG_RUNTIME_CPU_DETECT
 | 
			
		||||
 
 | 
			
		||||
@@ -22,7 +22,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 | 
			
		||||
    pbi->mb.rtcd         = &pbi->common.rtcd;
 | 
			
		||||
    pbi->dequant.block   = vp8_dequantize_b_c;
 | 
			
		||||
    pbi->dequant.idct_add    = vp8_dequant_idct_add_c;
 | 
			
		||||
    pbi->dequant.idct_dc_add    = vp8_dequant_dc_idct_add_c;
 | 
			
		||||
    pbi->dequant.dc_idct_add    = vp8_dequant_dc_idct_add_c;
 | 
			
		||||
    pbi->dboolhuff.start = vp8dx_start_decode_c;
 | 
			
		||||
    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
 | 
			
		||||
#if 0 //For use with RTCD, when implemented
 | 
			
		||||
 
 | 
			
		||||
@@ -22,7 +22,7 @@
 | 
			
		||||
#if HAVE_MMX
 | 
			
		||||
extern prototype_dequant_block(vp8_dequantize_b_mmx);
 | 
			
		||||
extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
 | 
			
		||||
extern prototype_dequant_idct_dc_add(vp8_dequant_dc_idct_add_mmx);
 | 
			
		||||
extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if !CONFIG_RUNTIME_CPU_DETECT
 | 
			
		||||
@@ -30,10 +30,10 @@ extern prototype_dequant_idct_dc_add(vp8_dequant_dc_idct_add_mmx);
 | 
			
		||||
#define vp8_dequant_block vp8_dequantize_b_mmx
 | 
			
		||||
 | 
			
		||||
#undef  vp8_dequant_idct_add
 | 
			
		||||
#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
 | 
			
		||||
#define vp8_dequant_idct_add vp8_dequant_idct_mmx
 | 
			
		||||
 | 
			
		||||
#undef  vp8_dequant_idct_dc
 | 
			
		||||
#define vp8_dequant_idct_add_dc vp8_dequant_dc_idct_add_mmx
 | 
			
		||||
#undef  vp8_dequant_dc_idct_add
 | 
			
		||||
#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_mmx
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -44,7 +44,7 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
 | 
			
		||||
    {
 | 
			
		||||
        pbi->dequant.block   = vp8_dequantize_b_mmx;
 | 
			
		||||
        pbi->dequant.idct_add    = vp8_dequant_idct_add_mmx;
 | 
			
		||||
        pbi->dequant.idct_dc_add = vp8_dequant_dc_idct_add_mmx;
 | 
			
		||||
        pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -125,6 +125,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/systemdependent.c
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 | 
			
		||||
 | 
			
		||||
VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/filter_c.c
 | 
			
		||||
VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/idctllm.c
 | 
			
		||||
VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/recon.c
 | 
			
		||||
VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/reconintra4x4.c
 | 
			
		||||
VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/generic/systemdependent.c
 | 
			
		||||
@@ -134,6 +135,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/bilinearfilter_v6$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x8_v6$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem16x16_v6$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/dc_only_idct_add_v6$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/iwalsh_v6$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/filter_v6$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/idct_v6$(ASM)
 | 
			
		||||
@@ -150,6 +152,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/bilinearpredict16x16_neon$(ASM
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x4_neon$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x8_neon$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem16x16_neon$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/dc_only_idct_add_neon$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/iwalsh_neon$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
 | 
			
		||||
VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
 | 
			
		||||
 
 | 
			
		||||
@@ -23,12 +23,12 @@ VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
 | 
			
		||||
 | 
			
		||||
#File list for armv6
 | 
			
		||||
# decoder
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantdcidct_v6$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantidct_v6$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_idct_v6$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
 | 
			
		||||
 | 
			
		||||
#File list for neon
 | 
			
		||||
# decoder
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantdcidct_neon$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantidct_neon$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_dc_idct_neon$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
 | 
			
		||||
VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user