f9bb4bdffc
on ev6. Originally committed as revision 979 to svn://svn.ffmpeg.org/ffmpeg/trunk
187 lines
6.2 KiB
ArmAsm
187 lines
6.2 KiB
ArmAsm
/*
|
|
* Alpha optimized DSP utils
|
|
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
#include "regdef.h"
|
|
#ifdef HAVE_AV_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
/* Some nicer register names. */
|
|
#define ta t10
|
|
#define tb t11
|
|
#define tc t12
|
|
#define td AT
|
|
/* Danger: these overlap with the argument list and the return value */
|
|
#define te a5
|
|
#define tf a4
|
|
#define tg a3
|
|
#define th v0
|
|
|
|
.set noat
|
|
.set noreorder
|
|
.arch pca56
|
|
.text
|
|
|
|
/*****************************************************************************
|
|
* int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
|
|
*
|
|
* This code is written with a pca56 in mind. For ev6, one should
|
|
* really take the increased latency of 3 cycles for MVI instructions
|
|
* into account.
|
|
*
|
|
* It is important to keep the loading and first use of a register as
|
|
* far apart as possible, because if a register is accessed before it
|
|
* has been fetched from memory, the CPU will stall.
|
|
*/
|
|
.align 4
|
|
.globl pix_abs16x16_mvi_asm
|
|
.ent pix_abs16x16_mvi_asm
|
|
pix_abs16x16_mvi_asm:
|
|
.frame sp, 0, ra, 0
|
|
.prologue 0
|
|
|
|
#ifdef HAVE_GPROF
|
|
lda AT, _mcount
|
|
jsr AT, (AT), _mcount
|
|
#endif
|
|
|
|
and a1, 7, t0
|
|
clr v0
|
|
lda a3, 16
|
|
beq t0, $aligned
|
|
.align 4
|
|
$unaligned:
|
|
/* Registers:
|
|
line 0:
|
|
t0: left_u -> left lo -> left
|
|
t1: mid
|
|
t2: right_u -> right hi -> right
|
|
t3: ref left
|
|
t4: ref right
|
|
line 1:
|
|
t5: left_u -> left lo -> left
|
|
t6: mid
|
|
t7: right_u -> right hi -> right
|
|
t8: ref left
|
|
t9: ref right
|
|
temp:
|
|
ta: left hi
|
|
tb: right lo
|
|
tc: error left
|
|
td: error right */
|
|
|
|
/* load line 0 */
|
|
ldq_u t0, 0(a1) # left_u
|
|
ldq_u t1, 8(a1) # mid
|
|
ldq_u t2, 16(a1) # right_u
|
|
ldq t3, 0(a0) # ref left
|
|
ldq t4, 8(a0) # ref right
|
|
addq a0, a2, a0 # pix1
|
|
addq a1, a2, a1 # pix2
|
|
/* load line 1 */
|
|
ldq_u t5, 0(a1) # left_u
|
|
ldq_u t6, 8(a1) # mid
|
|
ldq_u t7, 16(a1) # right_u
|
|
ldq t8, 0(a0) # ref left
|
|
ldq t9, 8(a0) # ref right
|
|
addq a0, a2, a0 # pix1
|
|
addq a1, a2, a1 # pix2
|
|
/* calc line 0 */
|
|
extql t0, a1, t0 # left lo
|
|
extqh t1, a1, ta # left hi
|
|
extql t1, a1, tb # right lo
|
|
or t0, ta, t0 # left
|
|
extqh t2, a1, t2 # right hi
|
|
perr t3, t0, tc # error left
|
|
or t2, tb, t2 # right
|
|
perr t4, t2, td # error right
|
|
addq v0, tc, v0 # add error left
|
|
addq v0, td, v0 # add error left
|
|
/* calc line 1 */
|
|
extql t5, a1, t5 # left lo
|
|
extqh t6, a1, ta # left hi
|
|
extql t6, a1, tb # right lo
|
|
or t5, ta, t5 # left
|
|
extqh t7, a1, t7 # right hi
|
|
perr t8, t5, tc # error left
|
|
or t7, tb, t7 # right
|
|
perr t9, t7, td # error right
|
|
addq v0, tc, v0 # add error left
|
|
addq v0, td, v0 # add error left
|
|
/* loop */
|
|
subq a3, 2, a3 # h -= 2
|
|
bne a3, $unaligned
|
|
ret
|
|
|
|
.align 4
|
|
$aligned:
|
|
/* load line 0 */
|
|
ldq t0, 0(a1) # left
|
|
ldq t1, 8(a1) # right
|
|
addq a1, a2, a1 # pix2
|
|
ldq t2, 0(a0) # ref left
|
|
ldq t3, 8(a0) # ref right
|
|
addq a0, a2, a0 # pix1
|
|
/* load line 1 */
|
|
ldq t4, 0(a1) # left
|
|
ldq t5, 8(a1) # right
|
|
addq a1, a2, a1 # pix2
|
|
ldq t6, 0(a0) # ref left
|
|
ldq t7, 8(a0) # ref right
|
|
addq a0, a2, a0 # pix1
|
|
/* load line 2 */
|
|
ldq t8, 0(a1) # left
|
|
ldq t9, 8(a1) # right
|
|
addq a1, a2, a1 # pix2
|
|
ldq ta, 0(a0) # ref left
|
|
ldq tb, 8(a0) # ref right
|
|
addq a0, a2, a0 # pix1
|
|
/* load line 3 */
|
|
ldq tc, 0(a1) # left
|
|
ldq td, 8(a1) # right
|
|
addq a1, a2, a1 # pix2
|
|
ldq te, 0(a0) # ref left
|
|
ldq tf, 8(a0) # ref right
|
|
/* calc line 0 */
|
|
perr t0, t2, t0 # error left
|
|
addq a0, a2, a0 # pix1
|
|
perr t1, t3, t1 # error right
|
|
addq v0, t0, v0 # add error left
|
|
/* calc line 1 */
|
|
perr t4, t6, t0 # error left
|
|
addq v0, t1, v0 # add error right
|
|
perr t5, t7, t1 # error right
|
|
addq v0, t0, v0 # add error left
|
|
/* calc line 2 */
|
|
perr t8, ta, t0 # error left
|
|
addq v0, t1, v0 # add error right
|
|
perr t9, tb, t1 # error right
|
|
addq v0, t0, v0 # add error left
|
|
/* calc line 3 */
|
|
perr tc, te, t0 # error left
|
|
addq v0, t1, v0 # add error right
|
|
perr td, tf, t1 # error right
|
|
addq v0, t0, v0 # add error left
|
|
addq v0, t1, v0 # add error right
|
|
/* loop */
|
|
subq a3, 4, a3 # h -= 4
|
|
bne a3, $aligned
|
|
ret
|
|
.end pix_abs16x16_mvi_asm
|