PPC asm for AV_RL*()
PPC is normally big endian but has special little endian load/store instructions. Using these avoids a separate byteswap. This makes the vorbis decoder about 5% faster. Not much else uses little-endian read/write extensively. GCC generates horrible PPC code for the default AV_[RW]B64 (which uses a packed struct), so we override it with a plain pointer cast. Originally committed as revision 18602 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
3c55ce039d
commit
9f5ff83f2a
3
configure
vendored
3
configure
vendored
@ -890,6 +890,7 @@ HAVE_LIST="
|
|||||||
getrusage
|
getrusage
|
||||||
inet_aton
|
inet_aton
|
||||||
inline_asm
|
inline_asm
|
||||||
|
ldbrx
|
||||||
libdc1394_1
|
libdc1394_1
|
||||||
libdc1394_2
|
libdc1394_2
|
||||||
llrint
|
llrint
|
||||||
@ -1807,7 +1808,7 @@ if test $cpu != "generic"; then
|
|||||||
Cell|CELL|cell)
|
Cell|CELL|cell)
|
||||||
add_cflags -mcpu=cell
|
add_cflags -mcpu=cell
|
||||||
warn_altivec disabled Cell
|
warn_altivec disabled Cell
|
||||||
enable ppc64
|
enable ppc64 ldbrx
|
||||||
;;
|
;;
|
||||||
# targets that do NOT support conditional mov (cmov)
|
# targets that do NOT support conditional mov (cmov)
|
||||||
i[345]86|pentium|pentium-mmx|k6|k6-[23]|winchip-c6|winchip2|c3)
|
i[345]86|pentium|pentium-mmx|k6|k6-[23]|winchip-c6|winchip2|c3)
|
||||||
|
@ -31,6 +31,8 @@
|
|||||||
|
|
||||||
#if ARCH_ARM
|
#if ARCH_ARM
|
||||||
# include "arm/intreadwrite.h"
|
# include "arm/intreadwrite.h"
|
||||||
|
#elif ARCH_PPC
|
||||||
|
# include "ppc/intreadwrite.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
104
libavutil/ppc/intreadwrite.h
Normal file
104
libavutil/ppc/intreadwrite.h
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVUTIL_PPC_INTREADWRITE_H
|
||||||
|
#define AVUTIL_PPC_INTREADWRITE_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#define AV_RL16 AV_RL16
|
||||||
|
static inline uint16_t AV_RL16(const void *p)
|
||||||
|
{
|
||||||
|
uint16_t v;
|
||||||
|
__asm__ ("lhbrx %0, %y1" : "=r"(v) : "Z"(*(const uint16_t*)p));
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AV_WL16 AV_WL16
|
||||||
|
static inline void AV_WL16(void *p, uint16_t v)
|
||||||
|
{
|
||||||
|
__asm__ ("sthbrx %1, %y0" : "=Z"(*(uint16_t*)p) : "r"(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AV_RL32 AV_RL32
|
||||||
|
static inline uint32_t AV_RL32(const void *p)
|
||||||
|
{
|
||||||
|
uint32_t v;
|
||||||
|
__asm__ ("lwbrx %0, %y1" : "=r"(v) : "Z"(*(const uint32_t*)p));
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AV_WL32 AV_WL32
|
||||||
|
static inline void AV_WL32(void *p, uint32_t v)
|
||||||
|
{
|
||||||
|
__asm__ ("stwbrx %1, %y0" : "=Z"(*(uint32_t*)p) : "r"(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
#if HAVE_LDBRX
|
||||||
|
|
||||||
|
#define AV_RL64 AV_RL64
|
||||||
|
static inline uint64_t AV_RL64(const void *p)
|
||||||
|
{
|
||||||
|
uint64_t v;
|
||||||
|
__asm__ ("ldbrx %0, %y1" : "=r"(v) : "Z"(*(const uint64_t*)p));
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AV_WL64 AV_WL64
|
||||||
|
static inline void AV_WL64(void *p, uint64_t v)
|
||||||
|
{
|
||||||
|
__asm__ ("stdbrx %1, %y0" : "=Z"(*(uint64_t*)p) : "r"(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define AV_RL64 AV_RL64
|
||||||
|
static inline uint64_t AV_RL64(const void *p)
|
||||||
|
{
|
||||||
|
union { uint64_t v; uint32_t hl[2]; } v;
|
||||||
|
__asm__ ("lwbrx %0, %y2 \n\t"
|
||||||
|
"lwbrx %1, %y3 \n\t"
|
||||||
|
: "=r"(v.hl[1]), "=r"(v.hl[0])
|
||||||
|
: "Z"(*(const uint32_t*)p), "Z"(*((const uint32_t*)p+1)));
|
||||||
|
return v.v;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AV_WL64 AV_WL64
|
||||||
|
static inline void AV_WL64(void *p, uint64_t v)
|
||||||
|
{
|
||||||
|
union { uint64_t v; uint32_t hl[2]; } vv = { v };
|
||||||
|
__asm__ ("stwbrx %2, %y0 \n\t"
|
||||||
|
"stwbrx %3, %y1 \n\t"
|
||||||
|
: "=Z"(*(uint32_t*)p), "=Z"(*((uint32_t*)p+1))
|
||||||
|
: "r"(vv.hl[1]), "r"(vv.hl[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* HAVE_LDBRX */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GCC fails miserably on the packed struct version which is used by
|
||||||
|
* default, so we override it here.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define AV_RB64(p) (*(const uint64_t *)(p))
|
||||||
|
#define AV_WB64(p, v) (*(uint64_t *)(p) = (v))
|
||||||
|
|
||||||
|
#endif /* AVUTIL_PPC_INTREADWRITE_H */
|
Loading…
Reference in New Issue
Block a user