2009-03-03 19:28:35 -08:00
|
|
|
/* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
|
|
|
|
/*
|
|
|
|
* Written by J.T. Conklin <jtc@netbsd.org>.
|
|
|
|
* Public domain.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <machine/asm.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* On the i486, this code is negligibly faster than the code generated
|
|
|
|
* by gcc at about half the size. If my i386 databook is correct, it
|
|
|
|
* should be considerably faster than the gcc code on a i386.
|
|
|
|
*/
|
|
|
|
|
|
|
|
ENTRY(swab)
|
|
|
|
pushl %esi
|
|
|
|
pushl %edi
|
|
|
|
movl 12(%esp),%esi
|
|
|
|
movl 16(%esp),%edi
|
|
|
|
movl 20(%esp),%ecx
|
|
|
|
|
|
|
|
cld # set direction forward
|
|
|
|
|
|
|
|
shrl $1,%ecx
|
|
|
|
testl $7,%ecx # copy first group of 1 to 7 words
|
|
|
|
jz L2 # while swaping alternate bytes.
|
|
|
|
.align 2,0x90
|
|
|
|
L1: lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
decl %ecx
|
|
|
|
testl $7,%ecx
|
|
|
|
jnz L1
|
|
|
|
|
|
|
|
L2: shrl $3,%ecx # copy remainder 8 words at a time
|
|
|
|
jz L4 # while swapping alternate bytes.
|
|
|
|
.align 2,0x90
|
|
|
|
L3: lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
lodsw
|
|
|
|
rorw $8,%ax
|
|
|
|
stosw
|
|
|
|
decl %ecx
|
|
|
|
jnz L3
|
|
|
|
|
|
|
|
L4: popl %edi
|
|
|
|
popl %esi
|
|
|
|
ret
|
2013-02-13 15:12:32 -08:00
|
|
|
END(swab)
|