ARM: make some NEON macros reusable
Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
150ddbc148
commit
a760f530bb
@ -19,46 +19,7 @@
|
||||
*/
|
||||
|
||||
#include "asm.S"
|
||||
|
||||
.macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7
|
||||
vtrn.32 \r0, \r4
|
||||
vtrn.32 \r1, \r5
|
||||
vtrn.32 \r2, \r6
|
||||
vtrn.32 \r3, \r7
|
||||
vtrn.16 \r0, \r2
|
||||
vtrn.16 \r1, \r3
|
||||
vtrn.16 \r4, \r6
|
||||
vtrn.16 \r5, \r7
|
||||
vtrn.8 \r0, \r1
|
||||
vtrn.8 \r2, \r3
|
||||
vtrn.8 \r4, \r5
|
||||
vtrn.8 \r6, \r7
|
||||
.endm
|
||||
|
||||
.macro transpose_4x4 r0 r1 r2 r3
|
||||
vtrn.16 \r0, \r2
|
||||
vtrn.16 \r1, \r3
|
||||
vtrn.8 \r0, \r1
|
||||
vtrn.8 \r2, \r3
|
||||
.endm
|
||||
|
||||
.macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
|
||||
vswp \r0, \r4
|
||||
vswp \r1, \r5
|
||||
vswp \r2, \r6
|
||||
vswp \r3, \r7
|
||||
.endm
|
||||
|
||||
.macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7
|
||||
vtrn.32 \r0, \r2
|
||||
vtrn.32 \r1, \r3
|
||||
vtrn.32 \r4, \r6
|
||||
vtrn.32 \r5, \r7
|
||||
vtrn.16 \r0, \r1
|
||||
vtrn.16 \r2, \r3
|
||||
vtrn.16 \r4, \r5
|
||||
vtrn.16 \r6, \r7
|
||||
.endm
|
||||
#include "neon.S"
|
||||
|
||||
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
|
||||
.macro h264_chroma_mc8 type
|
||||
|
59
libavcodec/arm/neon.S
Normal file
59
libavcodec/arm/neon.S
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
.macro transpose_8x8 r0, r1, r2, r3, r4, r5, r6, r7
|
||||
vtrn.32 \r0, \r4
|
||||
vtrn.32 \r1, \r5
|
||||
vtrn.32 \r2, \r6
|
||||
vtrn.32 \r3, \r7
|
||||
vtrn.16 \r0, \r2
|
||||
vtrn.16 \r1, \r3
|
||||
vtrn.16 \r4, \r6
|
||||
vtrn.16 \r5, \r7
|
||||
vtrn.8 \r0, \r1
|
||||
vtrn.8 \r2, \r3
|
||||
vtrn.8 \r4, \r5
|
||||
vtrn.8 \r6, \r7
|
||||
.endm
|
||||
|
||||
.macro transpose_4x4 r0, r1, r2, r3
|
||||
vtrn.16 \r0, \r2
|
||||
vtrn.16 \r1, \r3
|
||||
vtrn.8 \r0, \r1
|
||||
vtrn.8 \r2, \r3
|
||||
.endm
|
||||
|
||||
.macro swap4 r0, r1, r2, r3, r4, r5, r6, r7
|
||||
vswp \r0, \r4
|
||||
vswp \r1, \r5
|
||||
vswp \r2, \r6
|
||||
vswp \r3, \r7
|
||||
.endm
|
||||
|
||||
.macro transpose16_4x4 r0, r1, r2, r3, r4, r5, r6, r7
|
||||
vtrn.32 \r0, \r2
|
||||
vtrn.32 \r1, \r3
|
||||
vtrn.32 \r4, \r6
|
||||
vtrn.32 \r5, \r7
|
||||
vtrn.16 \r0, \r1
|
||||
vtrn.16 \r2, \r3
|
||||
vtrn.16 \r4, \r5
|
||||
vtrn.16 \r6, \r7
|
||||
.endm
|
@ -22,6 +22,7 @@
|
||||
*/
|
||||
|
||||
#include "asm.S"
|
||||
#include "neon.S"
|
||||
|
||||
function ff_vp8_luma_dc_wht_neon, export=1
|
||||
vld1.16 {q0-q1}, [r1,:128]
|
||||
@ -454,23 +455,6 @@ endfunc
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro transpose8x16matrix
|
||||
vtrn.32 q0, q4
|
||||
vtrn.32 q1, q5
|
||||
vtrn.32 q2, q6
|
||||
vtrn.32 q3, q7
|
||||
|
||||
vtrn.16 q0, q2
|
||||
vtrn.16 q1, q3
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q5, q7
|
||||
|
||||
vtrn.8 q0, q1
|
||||
vtrn.8 q2, q3
|
||||
vtrn.8 q4, q5
|
||||
vtrn.8 q6, q7
|
||||
.endm
|
||||
|
||||
.macro vp8_v_loop_filter16 name, inner=0, simple=0
|
||||
function ff_vp8_v_loop_filter16\name\()_neon, export=1
|
||||
vpush {q4-q7}
|
||||
@ -605,7 +589,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
vld1.8 {d13}, [r0], r1
|
||||
vld1.8 {d15}, [r0], r1
|
||||
|
||||
transpose8x16matrix
|
||||
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
|
||||
|
||||
vdup.8 q14, r2 @ flim_E
|
||||
.if !\simple
|
||||
@ -616,7 +600,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
|
||||
sub r0, r0, r1, lsl #4 @ backup 16 rows
|
||||
|
||||
transpose8x16matrix
|
||||
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
|
||||
|
||||
@ Store pixels:
|
||||
vst1.8 {d0}, [r0], r1
|
||||
@ -670,7 +654,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
vld1.8 {d14}, [r0], r2
|
||||
vld1.8 {d15}, [r1], r2
|
||||
|
||||
transpose8x16matrix
|
||||
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
|
||||
|
||||
vdup.8 q14, r3 @ flim_E
|
||||
vdup.8 q15, r12 @ flim_I
|
||||
@ -681,7 +665,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
sub r0, r0, r2, lsl #3 @ backup u 8 rows
|
||||
sub r1, r1, r2, lsl #3 @ backup v 8 rows
|
||||
|
||||
transpose8x16matrix
|
||||
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
|
||||
|
||||
@ Store pixels:
|
||||
vst1.8 {d0}, [r0], r2
|
||||
|
Loading…
x
Reference in New Issue
Block a user