Merge remote-tracking branch 'qatar/master'
* qatar/master: libxvid: remove disabled code qdm2: make a table static const qdm2: simplify bitstream reader setup for some subpacket types qdm2: use get_bits_left() build: Consistently handle conditional compilation for all optimization OBJS. avpacket, bfi, bgmc, rawenc: K&R prettyprinting cosmetics msrle: convert MS RLE decoding function to bytestream2. x86inc improvements for 64-bit Conflicts: common.mak libavcodec/avpacket.c libavcodec/bfi.c libavcodec/msrledec.c libavcodec/qdm2.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
ca19862d38
3
Makefile
3
Makefile
@ -66,7 +66,8 @@ config.h: .config
|
|||||||
|
|
||||||
SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \
|
SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \
|
||||||
ARCH_HEADERS BUILT_HEADERS SKIPHEADERS \
|
ARCH_HEADERS BUILT_HEADERS SKIPHEADERS \
|
||||||
ALTIVEC-OBJS ARMV6-OBJS MMX-OBJS NEON-OBJS YASM-OBJS \
|
ALTIVEC-OBJS ARMV5TE-OBJS ARMV6-OBJS ARMVFP-OBJS MMI-OBJS \
|
||||||
|
MMX-OBJS NEON-OBJS VIS-OBJS YASM-OBJS \
|
||||||
OBJS TESTOBJS
|
OBJS TESTOBJS
|
||||||
|
|
||||||
define RESET
|
define RESET
|
||||||
|
13
arch.mak
Normal file
13
arch.mak
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
OBJS-$(HAVE_ARMV5TE) += $(ARMV5TE-OBJS) $(ARMV5TE-OBJS-yes)
|
||||||
|
OBJS-$(HAVE_ARMV6) += $(ARMV6-OBJS) $(ARMV6-OBJS-yes)
|
||||||
|
OBJS-$(HAVE_ARMVFP) += $(ARMVFP-OBJS) $(ARMVFP-OBJS-yes)
|
||||||
|
OBJS-$(HAVE_NEON) += $(NEON-OBJS) $(NEON-OBJS-yes)
|
||||||
|
|
||||||
|
OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
|
||||||
|
|
||||||
|
OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
|
||||||
|
|
||||||
|
OBJS-$(HAVE_VIS) += $(VIS-OBJS) $(VIS-OBJS-yes)
|
||||||
|
|
||||||
|
OBJS-$(HAVE_MMX) += $(MMX-OBJS) $(MMX-OBJS-yes)
|
||||||
|
OBJS-$(HAVE_YASM) += $(YASM-OBJS) $(YASM-OBJS-yes)
|
@ -73,7 +73,7 @@ COMPILE_S = $(call COMPILE,AS)
|
|||||||
$(OBJS):
|
$(OBJS):
|
||||||
endif
|
endif
|
||||||
|
|
||||||
OBJS-$(HAVE_MMX) += $(MMX-OBJS-yes)
|
include $(SRC_PATH)/arch.mak
|
||||||
|
|
||||||
OBJS += $(OBJS-yes)
|
OBJS += $(OBJS-yes)
|
||||||
FFLIBS := $(FFLIBS-yes) $(FFLIBS)
|
FFLIBS := $(FFLIBS-yes) $(FFLIBS)
|
||||||
|
@ -34,17 +34,10 @@
|
|||||||
|
|
||||||
typedef struct AascContext {
|
typedef struct AascContext {
|
||||||
AVCodecContext *avctx;
|
AVCodecContext *avctx;
|
||||||
|
GetByteContext gb;
|
||||||
AVFrame frame;
|
AVFrame frame;
|
||||||
} AascContext;
|
} AascContext;
|
||||||
|
|
||||||
#define FETCH_NEXT_STREAM_BYTE() \
|
|
||||||
if (stream_ptr >= buf_size) \
|
|
||||||
{ \
|
|
||||||
av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (fetch)\n"); \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
stream_byte = buf[stream_ptr++];
|
|
||||||
|
|
||||||
static av_cold int aasc_decode_init(AVCodecContext *avctx)
|
static av_cold int aasc_decode_init(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
AascContext *s = avctx->priv_data;
|
AascContext *s = avctx->priv_data;
|
||||||
@ -89,7 +82,8 @@ static int aasc_decode_frame(AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
ff_msrle_decode(avctx, (AVPicture*)&s->frame, 8, buf - 4, buf_size + 4);
|
bytestream2_init(&s->gb, buf - 4, buf_size + 4);
|
||||||
|
ff_msrle_decode(avctx, (AVPicture*)&s->frame, 8, &s->gb);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
|
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
|
||||||
|
@ -28,21 +28,19 @@ OBJS += arm/dsputil_init_arm.o \
|
|||||||
arm/mpegvideo_arm.o \
|
arm/mpegvideo_arm.o \
|
||||||
arm/simple_idct_arm.o \
|
arm/simple_idct_arm.o \
|
||||||
|
|
||||||
OBJS-$(HAVE_ARMV5TE) += arm/dsputil_init_armv5te.o \
|
ARMV5TE-OBJS += arm/dsputil_init_armv5te.o \
|
||||||
arm/mpegvideo_armv5te.o \
|
arm/mpegvideo_armv5te.o \
|
||||||
arm/mpegvideo_armv5te_s.o \
|
arm/mpegvideo_armv5te_s.o \
|
||||||
arm/simple_idct_armv5te.o \
|
arm/simple_idct_armv5te.o \
|
||||||
|
|
||||||
OBJS-$(HAVE_ARMV6) += arm/dsputil_init_armv6.o \
|
ARMV6-OBJS += arm/dsputil_init_armv6.o \
|
||||||
arm/dsputil_armv6.o \
|
arm/dsputil_armv6.o \
|
||||||
arm/simple_idct_armv6.o \
|
arm/simple_idct_armv6.o \
|
||||||
$(ARMV6-OBJS-yes)
|
|
||||||
|
|
||||||
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \
|
ARMVFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
|
||||||
|
|
||||||
OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \
|
ARMVFP-OBJS += arm/dsputil_vfp.o \
|
||||||
arm/dsputil_init_vfp.o \
|
arm/dsputil_init_vfp.o \
|
||||||
$(VFP-OBJS-yes)
|
|
||||||
|
|
||||||
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
|
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
|
||||||
arm/fft_fixed_neon.o \
|
arm/fft_fixed_neon.o \
|
||||||
@ -84,10 +82,9 @@ NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \
|
|||||||
|
|
||||||
NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_neon.o
|
NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_neon.o
|
||||||
|
|
||||||
OBJS-$(HAVE_NEON) += arm/dsputil_init_neon.o \
|
NEON-OBJS += arm/dsputil_init_neon.o \
|
||||||
arm/dsputil_neon.o \
|
arm/dsputil_neon.o \
|
||||||
arm/fmtconvert_neon.o \
|
arm/fmtconvert_neon.o \
|
||||||
arm/int_neon.o \
|
arm/int_neon.o \
|
||||||
arm/mpegvideo_neon.o \
|
arm/mpegvideo_neon.o \
|
||||||
arm/simple_idct_neon.o \
|
arm/simple_idct_neon.o \
|
||||||
$(NEON-OBJS-yes)
|
|
||||||
|
@ -19,14 +19,15 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "avcodec.h"
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "libavutil/avassert.h"
|
#include "libavutil/avassert.h"
|
||||||
#include "bytestream.h"
|
#include "bytestream.h"
|
||||||
|
#include "avcodec.h"
|
||||||
|
|
||||||
void av_destruct_packet_nofree(AVPacket *pkt)
|
void av_destruct_packet_nofree(AVPacket *pkt)
|
||||||
{
|
{
|
||||||
pkt->data = NULL; pkt->size = 0;
|
pkt->data = NULL;
|
||||||
|
pkt->size = 0;
|
||||||
pkt->side_data = NULL;
|
pkt->side_data = NULL;
|
||||||
pkt->side_data_elems = 0;
|
pkt->side_data_elems = 0;
|
||||||
}
|
}
|
||||||
@ -43,7 +44,8 @@ void ff_packet_free_side_data(AVPacket *pkt)
|
|||||||
void av_destruct_packet(AVPacket *pkt)
|
void av_destruct_packet(AVPacket *pkt)
|
||||||
{
|
{
|
||||||
av_free(pkt->data);
|
av_free(pkt->data);
|
||||||
pkt->data = NULL; pkt->size = 0;
|
pkt->data = NULL;
|
||||||
|
pkt->size = 0;
|
||||||
|
|
||||||
ff_packet_free_side_data(pkt);
|
ff_packet_free_side_data(pkt);
|
||||||
}
|
}
|
||||||
@ -83,7 +85,8 @@ int av_new_packet(AVPacket *pkt, int size)
|
|||||||
|
|
||||||
void av_shrink_packet(AVPacket *pkt, int size)
|
void av_shrink_packet(AVPacket *pkt, int size)
|
||||||
{
|
{
|
||||||
if (pkt->size <= size) return;
|
if (pkt->size <= size)
|
||||||
|
return;
|
||||||
pkt->size = size;
|
pkt->size = size;
|
||||||
memset(pkt->data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
|
memset(pkt->data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
|
||||||
}
|
}
|
||||||
@ -94,9 +97,11 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
|
|||||||
av_assert0((unsigned)pkt->size <= INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE);
|
av_assert0((unsigned)pkt->size <= INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE);
|
||||||
if (!pkt->size)
|
if (!pkt->size)
|
||||||
return av_new_packet(pkt, grow_by);
|
return av_new_packet(pkt, grow_by);
|
||||||
if ((unsigned)grow_by > INT_MAX - (pkt->size + FF_INPUT_BUFFER_PADDING_SIZE))
|
if ((unsigned)grow_by >
|
||||||
|
INT_MAX - (pkt->size + FF_INPUT_BUFFER_PADDING_SIZE))
|
||||||
return -1;
|
return -1;
|
||||||
new_ptr = av_realloc(pkt->data, pkt->size + grow_by + FF_INPUT_BUFFER_PADDING_SIZE);
|
new_ptr = av_realloc(pkt->data,
|
||||||
|
pkt->size + grow_by + FF_INPUT_BUFFER_PADDING_SIZE);
|
||||||
if (!new_ptr)
|
if (!new_ptr)
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
pkt->data = new_ptr;
|
pkt->data = new_ptr;
|
||||||
@ -109,7 +114,8 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
|
|||||||
do { \
|
do { \
|
||||||
void *data; \
|
void *data; \
|
||||||
if (padding) { \
|
if (padding) { \
|
||||||
if ((unsigned)(size) > (unsigned)(size) + FF_INPUT_BUFFER_PADDING_SIZE) \
|
if ((unsigned)(size) > \
|
||||||
|
(unsigned)(size) + FF_INPUT_BUFFER_PADDING_SIZE) \
|
||||||
goto failed_alloc; \
|
goto failed_alloc; \
|
||||||
data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE); \
|
data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE); \
|
||||||
} else { \
|
} else { \
|
||||||
@ -119,7 +125,8 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
|
|||||||
goto failed_alloc; \
|
goto failed_alloc; \
|
||||||
memcpy(data, src, size); \
|
memcpy(data, src, size); \
|
||||||
if (padding) \
|
if (padding) \
|
||||||
memset((uint8_t*)data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE); \
|
memset((uint8_t *)data + size, 0, \
|
||||||
|
FF_INPUT_BUFFER_PADDING_SIZE); \
|
||||||
dst = data; \
|
dst = data; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
@ -127,7 +134,8 @@ int av_dup_packet(AVPacket *pkt)
|
|||||||
{
|
{
|
||||||
AVPacket tmp_pkt;
|
AVPacket tmp_pkt;
|
||||||
|
|
||||||
if (((pkt->destruct == av_destruct_packet_nofree) || (pkt->destruct == NULL)) && pkt->data) {
|
if (((pkt->destruct == av_destruct_packet_nofree) ||
|
||||||
|
(pkt->destruct == NULL)) && pkt->data) {
|
||||||
tmp_pkt = *pkt;
|
tmp_pkt = *pkt;
|
||||||
|
|
||||||
pkt->data = NULL;
|
pkt->data = NULL;
|
||||||
@ -140,14 +148,15 @@ int av_dup_packet(AVPacket *pkt)
|
|||||||
|
|
||||||
DUP_DATA(pkt->side_data, tmp_pkt.side_data,
|
DUP_DATA(pkt->side_data, tmp_pkt.side_data,
|
||||||
pkt->side_data_elems * sizeof(*pkt->side_data), 0);
|
pkt->side_data_elems * sizeof(*pkt->side_data), 0);
|
||||||
memset(pkt->side_data, 0, pkt->side_data_elems * sizeof(*pkt->side_data));
|
memset(pkt->side_data, 0,
|
||||||
for (i = 0; i < pkt->side_data_elems; i++) {
|
pkt->side_data_elems * sizeof(*pkt->side_data));
|
||||||
|
for (i = 0; i < pkt->side_data_elems; i++)
|
||||||
DUP_DATA(pkt->side_data[i].data, tmp_pkt.side_data[i].data,
|
DUP_DATA(pkt->side_data[i].data, tmp_pkt.side_data[i].data,
|
||||||
pkt->side_data[i].size, 1);
|
pkt->side_data[i].size, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
failed_alloc:
|
failed_alloc:
|
||||||
av_destruct_packet(pkt);
|
av_destruct_packet(pkt);
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
@ -156,8 +165,10 @@ failed_alloc:
|
|||||||
void av_free_packet(AVPacket *pkt)
|
void av_free_packet(AVPacket *pkt)
|
||||||
{
|
{
|
||||||
if (pkt) {
|
if (pkt) {
|
||||||
if (pkt->destruct) pkt->destruct(pkt);
|
if (pkt->destruct)
|
||||||
pkt->data = NULL; pkt->size = 0;
|
pkt->destruct(pkt);
|
||||||
|
pkt->data = NULL;
|
||||||
|
pkt->size = 0;
|
||||||
pkt->side_data = NULL;
|
pkt->side_data = NULL;
|
||||||
pkt->side_data_elems = 0;
|
pkt->side_data_elems = 0;
|
||||||
}
|
}
|
||||||
@ -173,7 +184,8 @@ uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
|
|||||||
if ((unsigned)size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
|
if ((unsigned)size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
pkt->side_data = av_realloc(pkt->side_data, (elems + 1) * sizeof(*pkt->side_data));
|
pkt->side_data = av_realloc(pkt->side_data,
|
||||||
|
(elems + 1) * sizeof(*pkt->side_data));
|
||||||
if (!pkt->side_data)
|
if (!pkt->side_data)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -84,8 +84,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
int shift = 16;
|
int shift = 16;
|
||||||
*pal = 0xFF << 24;
|
*pal = 0xFF << 24;
|
||||||
for (j = 0; j < 3; j++, shift -= 8)
|
for (j = 0; j < 3; j++, shift -= 8)
|
||||||
*pal +=
|
*pal += ((avctx->extradata[i * 3 + j] << 2) |
|
||||||
((avctx->extradata[i * 3 + j] << 2) |
|
|
||||||
(avctx->extradata[i * 3 + j] >> 4)) << shift;
|
(avctx->extradata[i * 3 + j] >> 4)) << shift;
|
||||||
pal++;
|
pal++;
|
||||||
}
|
}
|
||||||
@ -132,8 +131,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
switch (code) {
|
switch (code) {
|
||||||
|
case 0: // normal chain
|
||||||
case 0: //Normal Chain
|
|
||||||
if (length >= bytestream2_get_bytes_left(&g)) {
|
if (length >= bytestream2_get_bytes_left(&g)) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "Frame larger than buffer.\n");
|
av_log(avctx, AV_LOG_ERROR, "Frame larger than buffer.\n");
|
||||||
return -1;
|
return -1;
|
||||||
@ -141,8 +139,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
bytestream2_get_buffer(&g, dst, length);
|
bytestream2_get_buffer(&g, dst, length);
|
||||||
dst += length;
|
dst += length;
|
||||||
break;
|
break;
|
||||||
|
case 1: // back chain
|
||||||
case 1: //Back Chain
|
|
||||||
dst_offset = dst - offset;
|
dst_offset = dst - offset;
|
||||||
length *= 4; // Convert dwords to bytes.
|
length *= 4; // Convert dwords to bytes.
|
||||||
if (dst_offset < bfi->dst)
|
if (dst_offset < bfi->dst)
|
||||||
@ -150,12 +147,10 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
while (length--)
|
while (length--)
|
||||||
*dst++ = *dst_offset++;
|
*dst++ = *dst_offset++;
|
||||||
break;
|
break;
|
||||||
|
case 2: // skip chain
|
||||||
case 2: //Skip Chain
|
|
||||||
dst += length;
|
dst += length;
|
||||||
break;
|
break;
|
||||||
|
case 3: // fill chain
|
||||||
case 3: //Fill Chain
|
|
||||||
colour1 = bytestream2_get_byte(&g);
|
colour1 = bytestream2_get_byte(&g);
|
||||||
colour2 = bytestream2_get_byte(&g);
|
colour2 = bytestream2_get_byte(&g);
|
||||||
while (length--) {
|
while (length--) {
|
||||||
@ -163,7 +158,6 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
*dst++ = colour2;
|
*dst++ = colour2;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,10 +25,8 @@
|
|||||||
* @author Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
|
* @author Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "bgmc.h"
|
#include "bgmc.h"
|
||||||
|
|
||||||
|
|
||||||
#define FREQ_BITS 14 // bits used by frequency counters
|
#define FREQ_BITS 14 // bits used by frequency counters
|
||||||
#define VALUE_BITS 18 // bits used to represent the values
|
#define VALUE_BITS 18 // bits used to represent the values
|
||||||
#define TOP_VALUE ((1 << VALUE_BITS) - 1) // maximum value
|
#define TOP_VALUE ((1 << VALUE_BITS) - 1) // maximum value
|
||||||
@ -41,8 +39,7 @@
|
|||||||
#define LUT_BUFF 4 // number of buffered lookup tables
|
#define LUT_BUFF 4 // number of buffered lookup tables
|
||||||
|
|
||||||
|
|
||||||
/** Cumulative frequency tables for block Gilbert-Moore coding.
|
/** Cumulative frequency tables for block Gilbert-Moore coding. */
|
||||||
*/
|
|
||||||
static const uint16_t cf_tables_1[3][129] = {
|
static const uint16_t cf_tables_1[3][129] = {
|
||||||
{
|
{
|
||||||
16384, 16066, 15748, 15431, 15114, 14799, 14485, 14173, 13861, 13552,
|
16384, 16066, 15748, 15431, 15114, 14799, 14485, 14173, 13861, 13552,
|
||||||
@ -424,10 +421,8 @@ static const uint16_t * const cf_table[16] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** Initialize a given lookup table using a given delta
|
/** Initialize a given lookup table using a given delta */
|
||||||
*/
|
static void bgmc_lut_fillp(uint8_t *lut, int *lut_status, int delta)
|
||||||
static void bgmc_lut_fillp(uint8_t *lut, int *lut_status,
|
|
||||||
int delta)
|
|
||||||
{
|
{
|
||||||
unsigned int sx, i;
|
unsigned int sx, i;
|
||||||
|
|
||||||
@ -446,10 +441,8 @@ static void bgmc_lut_fillp(uint8_t *lut, int *lut_status,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Retune the index of a suitable lookup table for a given delta
|
/** Retune the index of a suitable lookup table for a given delta */
|
||||||
*/
|
static uint8_t *bgmc_lut_getp(uint8_t *lut, int *lut_status, int delta)
|
||||||
static uint8_t* bgmc_lut_getp(uint8_t *lut, int *lut_status,
|
|
||||||
int delta)
|
|
||||||
{
|
{
|
||||||
unsigned int i = av_clip(delta, 0, LUT_BUFF - 1);
|
unsigned int i = av_clip(delta, 0, LUT_BUFF - 1);
|
||||||
|
|
||||||
@ -462,8 +455,7 @@ static uint8_t* bgmc_lut_getp(uint8_t *lut, int *lut_status,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Initialize the lookup table arrays
|
/** Initialize the lookup table arrays */
|
||||||
*/
|
|
||||||
int ff_bgmc_init(AVCodecContext *avctx, uint8_t **cf_lut, int **cf_lut_status)
|
int ff_bgmc_init(AVCodecContext *avctx, uint8_t **cf_lut, int **cf_lut_status)
|
||||||
{
|
{
|
||||||
*cf_lut = av_malloc(sizeof(*cf_lut) * LUT_BUFF * 16 * LUT_SIZE);
|
*cf_lut = av_malloc(sizeof(*cf_lut) * LUT_BUFF * 16 * LUT_SIZE);
|
||||||
@ -474,8 +466,7 @@ int ff_bgmc_init(AVCodecContext *avctx, uint8_t **cf_lut, int **cf_lut_status)
|
|||||||
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
|
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
} else {
|
} else {
|
||||||
// initialize lut_status buffer to a value never used to compare
|
// initialize lut_status buffer to a value never used to compare against
|
||||||
// against
|
|
||||||
memset(*cf_lut_status, -1, sizeof(*cf_lut_status) * LUT_BUFF);
|
memset(*cf_lut_status, -1, sizeof(*cf_lut_status) * LUT_BUFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -483,8 +474,7 @@ int ff_bgmc_init(AVCodecContext *avctx, uint8_t **cf_lut, int **cf_lut_status)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Release the lookup table arrays
|
/** Release the lookup table arrays */
|
||||||
*/
|
|
||||||
void ff_bgmc_end(uint8_t **cf_lut, int **cf_lut_status)
|
void ff_bgmc_end(uint8_t **cf_lut, int **cf_lut_status)
|
||||||
{
|
{
|
||||||
av_freep(cf_lut);
|
av_freep(cf_lut);
|
||||||
@ -492,10 +482,9 @@ void ff_bgmc_end(uint8_t **cf_lut, int **cf_lut_status)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Initialize decoding and reads the first value
|
/** Initialize decoding and reads the first value */
|
||||||
*/
|
void ff_bgmc_decode_init(GetBitContext *gb, unsigned int *h, unsigned int *l,
|
||||||
void ff_bgmc_decode_init(GetBitContext *gb,
|
unsigned int *v)
|
||||||
unsigned int *h, unsigned int *l, unsigned int *v)
|
|
||||||
{
|
{
|
||||||
*h = TOP_VALUE;
|
*h = TOP_VALUE;
|
||||||
*l = 0;
|
*l = 0;
|
||||||
@ -503,16 +492,14 @@ void ff_bgmc_decode_init(GetBitContext *gb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Finish decoding
|
/** Finish decoding */
|
||||||
*/
|
|
||||||
void ff_bgmc_decode_end(GetBitContext *gb)
|
void ff_bgmc_decode_end(GetBitContext *gb)
|
||||||
{
|
{
|
||||||
skip_bits_long(gb, -(VALUE_BITS - 2));
|
skip_bits_long(gb, -(VALUE_BITS - 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Read and decode a block Gilbert-Moore coded symbol
|
/** Read and decode a block Gilbert-Moore coded symbol */
|
||||||
*/
|
|
||||||
void ff_bgmc_decode(GetBitContext *gb, unsigned int num, int32_t *dst,
|
void ff_bgmc_decode(GetBitContext *gb, unsigned int num, int32_t *dst,
|
||||||
int delta, unsigned int sx,
|
int delta, unsigned int sx,
|
||||||
unsigned int *h, unsigned int *l, unsigned int *v,
|
unsigned int *h, unsigned int *l, unsigned int *v,
|
||||||
@ -552,7 +539,8 @@ void ff_bgmc_decode(GetBitContext *gb, unsigned int num, int32_t *dst,
|
|||||||
value -= FIRST_QTR;
|
value -= FIRST_QTR;
|
||||||
low -= FIRST_QTR;
|
low -= FIRST_QTR;
|
||||||
high -= FIRST_QTR;
|
high -= FIRST_QTR;
|
||||||
} else break;
|
} else
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
low *= 2;
|
low *= 2;
|
||||||
|
@ -53,6 +53,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
|
|||||||
uint8_t *ptr;
|
uint8_t *ptr;
|
||||||
int dsize;
|
int dsize;
|
||||||
const uint8_t *buf0 = buf;
|
const uint8_t *buf0 = buf;
|
||||||
|
GetByteContext gb;
|
||||||
|
|
||||||
if(buf_size < 14){
|
if(buf_size < 14){
|
||||||
av_log(avctx, AV_LOG_ERROR, "buf size too small (%d)\n", buf_size);
|
av_log(avctx, AV_LOG_ERROR, "buf size too small (%d)\n", buf_size);
|
||||||
@ -269,7 +270,8 @@ static int bmp_decode_frame(AVCodecContext *avctx,
|
|||||||
p->data[0] += p->linesize[0] * (avctx->height - 1);
|
p->data[0] += p->linesize[0] * (avctx->height - 1);
|
||||||
p->linesize[0] = -p->linesize[0];
|
p->linesize[0] = -p->linesize[0];
|
||||||
}
|
}
|
||||||
ff_msrle_decode(avctx, (AVPicture*)p, depth, buf, dsize);
|
bytestream2_init(&gb, buf, dsize);
|
||||||
|
ff_msrle_decode(avctx, (AVPicture*)p, depth, &gb);
|
||||||
if(height < 0){
|
if(height < 0){
|
||||||
p->data[0] += p->linesize[0] * (avctx->height - 1);
|
p->data[0] += p->linesize[0] * (avctx->height - 1);
|
||||||
p->linesize[0] = -p->linesize[0];
|
p->linesize[0] = -p->linesize[0];
|
||||||
|
@ -105,10 +105,6 @@ float ff_xvid_rate_estimate_qscale(MpegEncContext *s, int dry_run){
|
|||||||
xvid_plg_data.bquant_offset = 0; // 100 * s->avctx->b_quant_offset;
|
xvid_plg_data.bquant_offset = 0; // 100 * s->avctx->b_quant_offset;
|
||||||
xvid_plg_data.bquant_ratio = 100; // * s->avctx->b_quant_factor;
|
xvid_plg_data.bquant_ratio = 100; // * s->avctx->b_quant_factor;
|
||||||
|
|
||||||
#if 0
|
|
||||||
xvid_plg_data.stats.hlength= X
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if(!s->rc_context.dry_run_qscale){
|
if(!s->rc_context.dry_run_qscale){
|
||||||
if(s->picture_number){
|
if(s->picture_number){
|
||||||
xvid_plg_data.length=
|
xvid_plg_data.length=
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
OBJS-$(HAVE_MMI) += mips/dsputil_mmi.o \
|
MMI-OBJS += mips/dsputil_mmi.o \
|
||||||
mips/idct_mmi.o \
|
mips/idct_mmi.o \
|
||||||
mips/mpegvideo_mmi.o \
|
mips/mpegvideo_mmi.o \
|
||||||
|
@ -40,6 +40,7 @@ typedef struct MsrleContext {
|
|||||||
AVCodecContext *avctx;
|
AVCodecContext *avctx;
|
||||||
AVFrame frame;
|
AVFrame frame;
|
||||||
|
|
||||||
|
GetByteContext gb;
|
||||||
const unsigned char *buf;
|
const unsigned char *buf;
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
@ -127,7 +128,8 @@ static int msrle_decode_frame(AVCodecContext *avctx,
|
|||||||
ptr += s->frame.linesize[0];
|
ptr += s->frame.linesize[0];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ff_msrle_decode(avctx, (AVPicture*)&s->frame, avctx->bits_per_coded_sample, buf, buf_size);
|
bytestream2_init(&s->gb, buf, buf_size);
|
||||||
|
ff_msrle_decode(avctx, (AVPicture*)&s->frame, avctx->bits_per_coded_sample, &s->gb);
|
||||||
}
|
}
|
||||||
|
|
||||||
*data_size = sizeof(AVFrame);
|
*data_size = sizeof(AVFrame);
|
||||||
|
@ -30,18 +30,9 @@
|
|||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "msrledec.h"
|
#include "msrledec.h"
|
||||||
|
|
||||||
#define FETCH_NEXT_STREAM_BYTE() \
|
|
||||||
if (stream_ptr >= data_size) \
|
|
||||||
{ \
|
|
||||||
av_log(avctx, AV_LOG_ERROR, " MS RLE: stream ptr just went out of bounds (1)\n"); \
|
|
||||||
return -1; \
|
|
||||||
} \
|
|
||||||
stream_byte = data[stream_ptr++];
|
|
||||||
|
|
||||||
static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
|
static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
|
||||||
const uint8_t *data, int data_size)
|
GetByteContext *gb)
|
||||||
{
|
{
|
||||||
int stream_ptr = 0;
|
|
||||||
unsigned char rle_code;
|
unsigned char rle_code;
|
||||||
unsigned char extra_byte, odd_pixel;
|
unsigned char extra_byte, odd_pixel;
|
||||||
unsigned char stream_byte;
|
unsigned char stream_byte;
|
||||||
@ -52,11 +43,16 @@ static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
while (row_ptr >= 0) {
|
while (row_ptr >= 0) {
|
||||||
FETCH_NEXT_STREAM_BYTE();
|
if (bytestream2_get_bytes_left(gb) <= 0) {
|
||||||
rle_code = stream_byte;
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
|
"MS RLE: bytestream overrun, %d rows left\n",
|
||||||
|
row_ptr);
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
|
}
|
||||||
|
rle_code = stream_byte = bytestream2_get_byteu(gb);
|
||||||
if (rle_code == 0) {
|
if (rle_code == 0) {
|
||||||
/* fetch the next byte to see how to handle escape code */
|
/* fetch the next byte to see how to handle escape code */
|
||||||
FETCH_NEXT_STREAM_BYTE();
|
stream_byte = bytestream2_get_byte(gb);
|
||||||
if (stream_byte == 0) {
|
if (stream_byte == 0) {
|
||||||
/* line is done, goto the next one */
|
/* line is done, goto the next one */
|
||||||
row_ptr -= row_dec;
|
row_ptr -= row_dec;
|
||||||
@ -66,24 +62,26 @@ static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
|
|||||||
return 0;
|
return 0;
|
||||||
} else if (stream_byte == 2) {
|
} else if (stream_byte == 2) {
|
||||||
/* reposition frame decode coordinates */
|
/* reposition frame decode coordinates */
|
||||||
FETCH_NEXT_STREAM_BYTE();
|
stream_byte = bytestream2_get_byte(gb);
|
||||||
pixel_ptr += stream_byte;
|
pixel_ptr += stream_byte;
|
||||||
FETCH_NEXT_STREAM_BYTE();
|
stream_byte = bytestream2_get_byte(gb);
|
||||||
row_ptr -= stream_byte * row_dec;
|
row_ptr -= stream_byte * row_dec;
|
||||||
} else {
|
} else {
|
||||||
// copy pixels from encoded stream
|
// copy pixels from encoded stream
|
||||||
odd_pixel = stream_byte & 1;
|
odd_pixel = stream_byte & 1;
|
||||||
rle_code = (stream_byte + 1) / 2;
|
rle_code = (stream_byte + 1) / 2;
|
||||||
extra_byte = rle_code & 0x01;
|
extra_byte = rle_code & 0x01;
|
||||||
if (row_ptr + pixel_ptr + stream_byte > frame_size) {
|
if (row_ptr + pixel_ptr + stream_byte > frame_size ||
|
||||||
av_log(avctx, AV_LOG_ERROR, " MS RLE: frame ptr just went out of bounds (1)\n");
|
bytestream2_get_bytes_left(gb) < rle_code) {
|
||||||
return -1;
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
|
"MS RLE: frame/stream ptr just went out of bounds (copy)\n");
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < rle_code; i++) {
|
for (i = 0; i < rle_code; i++) {
|
||||||
if (pixel_ptr >= avctx->width)
|
if (pixel_ptr >= avctx->width)
|
||||||
break;
|
break;
|
||||||
FETCH_NEXT_STREAM_BYTE();
|
stream_byte = bytestream2_get_byteu(gb);
|
||||||
pic->data[0][row_ptr + pixel_ptr] = stream_byte >> 4;
|
pic->data[0][row_ptr + pixel_ptr] = stream_byte >> 4;
|
||||||
pixel_ptr++;
|
pixel_ptr++;
|
||||||
if (i + 1 == rle_code && odd_pixel)
|
if (i + 1 == rle_code && odd_pixel)
|
||||||
@ -96,15 +94,16 @@ static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
|
|||||||
|
|
||||||
// if the RLE code is odd, skip a byte in the stream
|
// if the RLE code is odd, skip a byte in the stream
|
||||||
if (extra_byte)
|
if (extra_byte)
|
||||||
stream_ptr++;
|
bytestream2_skip(gb, 1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// decode a run of data
|
// decode a run of data
|
||||||
if (row_ptr + pixel_ptr + stream_byte > frame_size) {
|
if (row_ptr + pixel_ptr + stream_byte > frame_size) {
|
||||||
av_log(avctx, AV_LOG_ERROR, " MS RLE: frame ptr just went out of bounds (1)\n");
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
return -1;
|
"MS RLE: frame ptr just went out of bounds (run)\n");
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
}
|
}
|
||||||
FETCH_NEXT_STREAM_BYTE();
|
stream_byte = bytestream2_get_byte(gb);
|
||||||
for (i = 0; i < rle_code; i++) {
|
for (i = 0; i < rle_code; i++) {
|
||||||
if (pixel_ptr >= avctx->width)
|
if (pixel_ptr >= avctx->width)
|
||||||
break;
|
break;
|
||||||
@ -118,21 +117,21 @@ static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* one last sanity check on the way out */
|
/* one last sanity check on the way out */
|
||||||
if (stream_ptr < data_size) {
|
if (bytestream2_get_bytes_left(gb)) {
|
||||||
av_log(avctx, AV_LOG_ERROR, " MS RLE: ended frame decode with bytes left over (%d < %d)\n",
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
stream_ptr, data_size);
|
"MS RLE: ended frame decode with %d bytes left over\n",
|
||||||
return -1;
|
bytestream2_get_bytes_left(gb));
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int depth,
|
static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
|
||||||
const uint8_t *data, int srcsize)
|
int depth, GetByteContext *gb)
|
||||||
{
|
{
|
||||||
uint8_t *output, *output_end;
|
uint8_t *output, *output_end;
|
||||||
const uint8_t* src = data;
|
|
||||||
int p1, p2, line=avctx->height - 1, pos=0, i;
|
int p1, p2, line=avctx->height - 1, pos=0, i;
|
||||||
uint16_t pix16;
|
uint16_t pix16;
|
||||||
uint32_t pix32;
|
uint32_t pix32;
|
||||||
@ -140,23 +139,29 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
|
|||||||
|
|
||||||
output = pic->data[0] + (avctx->height - 1) * pic->linesize[0];
|
output = pic->data[0] + (avctx->height - 1) * pic->linesize[0];
|
||||||
output_end = pic->data[0] + avctx->height * pic->linesize[0];
|
output_end = pic->data[0] + avctx->height * pic->linesize[0];
|
||||||
while(src + 1 < data + srcsize) {
|
while (bytestream2_get_bytes_left(gb) > 0) {
|
||||||
p1 = *src++;
|
p1 = bytestream2_get_byteu(gb);
|
||||||
if(p1 == 0) { //Escape code
|
if(p1 == 0) { //Escape code
|
||||||
p2 = *src++;
|
p2 = bytestream2_get_byte(gb);
|
||||||
if(p2 == 0) { //End-of-line
|
if(p2 == 0) { //End-of-line
|
||||||
output = pic->data[0] + (--line) * pic->linesize[0];
|
output = pic->data[0] + (--line) * pic->linesize[0];
|
||||||
if (line < 0 && !(src+1 < data + srcsize && AV_RB16(src) == 1)) {
|
if (line < 0) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "Next line is beyond picture bounds\n");
|
if (bytestream2_get_be16(gb) == 1) { // end-of-picture
|
||||||
return -1;
|
return 0;
|
||||||
|
} else {
|
||||||
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
|
"Next line is beyond picture bounds (%d bytes left)\n",
|
||||||
|
bytestream2_get_bytes_left(gb));
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pos = 0;
|
pos = 0;
|
||||||
continue;
|
continue;
|
||||||
} else if(p2 == 1) { //End-of-picture
|
} else if(p2 == 1) { //End-of-picture
|
||||||
return 0;
|
return 0;
|
||||||
} else if(p2 == 2) { //Skip
|
} else if(p2 == 2) { //Skip
|
||||||
p1 = *src++;
|
p1 = bytestream2_get_byte(gb);
|
||||||
p2 = *src++;
|
p2 = bytestream2_get_byte(gb);
|
||||||
line -= p2;
|
line -= p2;
|
||||||
pos += p1;
|
pos += p1;
|
||||||
if (line < 0 || pos >= width){
|
if (line < 0 || pos >= width){
|
||||||
@ -167,35 +172,31 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Copy data
|
// Copy data
|
||||||
if ((pic->linesize[0] > 0 && output + p2 * (depth >> 3) > output_end)
|
if ((pic->linesize[0] > 0 && output + p2 * (depth >> 3) > output_end) ||
|
||||||
||(pic->linesize[0] < 0 && output + p2 * (depth >> 3) < output_end)) {
|
(pic->linesize[0] < 0 && output + p2 * (depth >> 3) < output_end)) {
|
||||||
src += p2 * (depth >> 3);
|
bytestream2_skip(gb, 2 * (depth >> 3));
|
||||||
continue;
|
continue;
|
||||||
|
} else if (bytestream2_get_bytes_left(gb) < p2 * (depth >> 3)) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "bytestream overrun\n");
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
}
|
}
|
||||||
if(data + srcsize - src < p2 * (depth >> 3)){
|
|
||||||
av_log(avctx, AV_LOG_ERROR, "Copy beyond input buffer\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if ((depth == 8) || (depth == 24)) {
|
if ((depth == 8) || (depth == 24)) {
|
||||||
for(i = 0; i < p2 * (depth >> 3); i++) {
|
for(i = 0; i < p2 * (depth >> 3); i++) {
|
||||||
*output++ = *src++;
|
*output++ = bytestream2_get_byteu(gb);
|
||||||
}
|
}
|
||||||
// RLE8 copy is actually padded - and runs are not!
|
// RLE8 copy is actually padded - and runs are not!
|
||||||
if(depth == 8 && (p2 & 1)) {
|
if(depth == 8 && (p2 & 1)) {
|
||||||
src++;
|
bytestream2_skip(gb, 1);
|
||||||
}
|
}
|
||||||
} else if (depth == 16) {
|
} else if (depth == 16) {
|
||||||
for(i = 0; i < p2; i++) {
|
for(i = 0; i < p2; i++) {
|
||||||
pix16 = AV_RL16(src);
|
*(uint16_t*)output = bytestream2_get_le16u(gb);
|
||||||
src += 2;
|
|
||||||
*(uint16_t*)output = pix16;
|
|
||||||
output += 2;
|
output += 2;
|
||||||
}
|
}
|
||||||
} else if (depth == 32) {
|
} else if (depth == 32) {
|
||||||
for(i = 0; i < p2; i++) {
|
for(i = 0; i < p2; i++) {
|
||||||
pix32 = AV_RL32(src);
|
*(uint32_t*)output = bytestream2_get_le32u(gb);
|
||||||
src += 4;
|
|
||||||
*(uint32_t*)output = pix32;
|
|
||||||
output += 4;
|
output += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -203,21 +204,19 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
|
|||||||
} else { //run of pixels
|
} else { //run of pixels
|
||||||
uint8_t pix[3]; //original pixel
|
uint8_t pix[3]; //original pixel
|
||||||
switch(depth){
|
switch(depth){
|
||||||
case 8: pix[0] = *src++;
|
case 8: pix[0] = bytestream2_get_byte(gb);
|
||||||
break;
|
break;
|
||||||
case 16: pix16 = AV_RL16(src);
|
case 16: pix16 = bytestream2_get_le16(gb);
|
||||||
src += 2;
|
|
||||||
break;
|
break;
|
||||||
case 24: pix[0] = *src++;
|
case 24: pix[0] = bytestream2_get_byte(gb);
|
||||||
pix[1] = *src++;
|
pix[1] = bytestream2_get_byte(gb);
|
||||||
pix[2] = *src++;
|
pix[2] = bytestream2_get_byte(gb);
|
||||||
break;
|
break;
|
||||||
case 32: pix32 = AV_RL32(src);
|
case 32: pix32 = bytestream2_get_le32(gb);
|
||||||
src += 4;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if ((pic->linesize[0] > 0 && output + p1 * (depth >> 3) > output_end)
|
if ((pic->linesize[0] > 0 && output + p1 * (depth >> 3) > output_end) ||
|
||||||
||(pic->linesize[0] < 0 && output + p1 * (depth >> 3) < output_end))
|
(pic->linesize[0] < 0 && output + p1 * (depth >> 3) < output_end))
|
||||||
continue;
|
continue;
|
||||||
for(i = 0; i < p1; i++) {
|
for(i = 0; i < p1; i++) {
|
||||||
switch(depth){
|
switch(depth){
|
||||||
@ -244,17 +243,17 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic, int depth,
|
int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic,
|
||||||
const uint8_t* data, int data_size)
|
int depth, GetByteContext *gb)
|
||||||
{
|
{
|
||||||
switch(depth){
|
switch(depth){
|
||||||
case 4:
|
case 4:
|
||||||
return msrle_decode_pal4(avctx, pic, data, data_size);
|
return msrle_decode_pal4(avctx, pic, gb);
|
||||||
case 8:
|
case 8:
|
||||||
case 16:
|
case 16:
|
||||||
case 24:
|
case 24:
|
||||||
case 32:
|
case 32:
|
||||||
return msrle_decode_8_16_24_32(avctx, pic, depth, data, data_size);
|
return msrle_decode_8_16_24_32(avctx, pic, depth, gb);
|
||||||
default:
|
default:
|
||||||
av_log(avctx, AV_LOG_ERROR, "Unknown depth %d\n", depth);
|
av_log(avctx, AV_LOG_ERROR, "Unknown depth %d\n", depth);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#define AVCODEC_MSRLEDEC_H
|
#define AVCODEC_MSRLEDEC_H
|
||||||
|
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
|
#include "bytestream.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode stream in MS RLE format into frame.
|
* Decode stream in MS RLE format into frame.
|
||||||
@ -30,10 +31,9 @@
|
|||||||
* @param avctx codec context
|
* @param avctx codec context
|
||||||
* @param pic destination frame
|
* @param pic destination frame
|
||||||
* @param depth bit depth
|
* @param depth bit depth
|
||||||
* @param data input stream
|
* @param gb input bytestream context
|
||||||
* @param data_size input size
|
|
||||||
*/
|
*/
|
||||||
int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic, int depth,
|
int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic,
|
||||||
const uint8_t* data, int data_size);
|
int depth, GetByteContext *gb);
|
||||||
|
|
||||||
#endif /* AVCODEC_MSRLEDEC_H */
|
#endif /* AVCODEC_MSRLEDEC_H */
|
||||||
|
@ -11,7 +11,7 @@ ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o
|
|||||||
ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o
|
ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o
|
||||||
ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o
|
ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o
|
||||||
|
|
||||||
OBJS-$(HAVE_ALTIVEC) += ppc/dsputil_altivec.o \
|
ALTIVEC-OBJS += ppc/dsputil_altivec.o \
|
||||||
ppc/fdct_altivec.o \
|
ppc/fdct_altivec.o \
|
||||||
ppc/float_altivec.o \
|
ppc/float_altivec.o \
|
||||||
ppc/fmtconvert_altivec.o \
|
ppc/fmtconvert_altivec.o \
|
||||||
@ -19,4 +19,3 @@ OBJS-$(HAVE_ALTIVEC) += ppc/dsputil_altivec.o \
|
|||||||
ppc/idct_altivec.o \
|
ppc/idct_altivec.o \
|
||||||
ppc/int_altivec.o \
|
ppc/int_altivec.o \
|
||||||
ppc/mpegvideo_altivec.o \
|
ppc/mpegvideo_altivec.o \
|
||||||
$(ALTIVEC-OBJS-yes)
|
|
||||||
|
@ -69,8 +69,6 @@ do { \
|
|||||||
|
|
||||||
#define SB_DITHERING_NOISE(sb,noise_idx) (noise_table[(noise_idx)++] * sb_noise_attenuation[(sb)])
|
#define SB_DITHERING_NOISE(sb,noise_idx) (noise_table[(noise_idx)++] * sb_noise_attenuation[(sb)])
|
||||||
|
|
||||||
#define BITS_LEFT(length,gb) ((length) - get_bits_count ((gb)))
|
|
||||||
|
|
||||||
#define SAMPLES_NEEDED \
|
#define SAMPLES_NEEDED \
|
||||||
av_log (NULL,AV_LOG_INFO,"This file triggers some untested code. Please contact the developers.\n");
|
av_log (NULL,AV_LOG_INFO,"This file triggers some untested code. Please contact the developers.\n");
|
||||||
|
|
||||||
@ -202,8 +200,6 @@ typedef struct {
|
|||||||
} QDM2Context;
|
} QDM2Context;
|
||||||
|
|
||||||
|
|
||||||
static uint8_t empty_buffer[FF_INPUT_BUFFER_PADDING_SIZE];
|
|
||||||
|
|
||||||
static VLC vlc_tab_level;
|
static VLC vlc_tab_level;
|
||||||
static VLC vlc_tab_diff;
|
static VLC vlc_tab_diff;
|
||||||
static VLC vlc_tab_run;
|
static VLC vlc_tab_run;
|
||||||
@ -503,7 +499,7 @@ static void fix_coding_method_array (int sb, int channels, sb_int8_array coding_
|
|||||||
int j,k;
|
int j,k;
|
||||||
int ch;
|
int ch;
|
||||||
int run, case_val;
|
int run, case_val;
|
||||||
int switchtable[23] = {0,5,1,5,5,5,5,5,2,5,5,5,5,5,5,5,3,5,5,5,5,5,4};
|
static const int switchtable[23] = {0,5,1,5,5,5,5,5,2,5,5,5,5,5,5,5,3,5,5,5,5,5,4};
|
||||||
|
|
||||||
for (ch = 0; ch < channels; ch++) {
|
for (ch = 0; ch < channels; ch++) {
|
||||||
for (j = 0; j < 64; ) {
|
for (j = 0; j < 64; ) {
|
||||||
@ -793,10 +789,10 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
else if (sb >= 24)
|
else if (sb >= 24)
|
||||||
joined_stereo = 1;
|
joined_stereo = 1;
|
||||||
else
|
else
|
||||||
joined_stereo = (BITS_LEFT(length,gb) >= 1) ? get_bits1 (gb) : 0;
|
joined_stereo = (get_bits_left(gb) >= 1) ? get_bits1 (gb) : 0;
|
||||||
|
|
||||||
if (joined_stereo) {
|
if (joined_stereo) {
|
||||||
if (BITS_LEFT(length,gb) >= 16)
|
if (get_bits_left(gb) >= 16)
|
||||||
for (j = 0; j < 16; j++)
|
for (j = 0; j < 16; j++)
|
||||||
sign_bits[j] = get_bits1 (gb);
|
sign_bits[j] = get_bits1 (gb);
|
||||||
|
|
||||||
@ -809,14 +805,14 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (ch = 0; ch < channels; ch++) {
|
for (ch = 0; ch < channels; ch++) {
|
||||||
zero_encoding = (BITS_LEFT(length,gb) >= 1) ? get_bits1(gb) : 0;
|
zero_encoding = (get_bits_left(gb) >= 1) ? get_bits1(gb) : 0;
|
||||||
type34_predictor = 0.0;
|
type34_predictor = 0.0;
|
||||||
type34_first = 1;
|
type34_first = 1;
|
||||||
|
|
||||||
for (j = 0; j < 128; ) {
|
for (j = 0; j < 128; ) {
|
||||||
switch (q->coding_method[ch][sb][j / 2]) {
|
switch (q->coding_method[ch][sb][j / 2]) {
|
||||||
case 8:
|
case 8:
|
||||||
if (BITS_LEFT(length,gb) >= 10) {
|
if (get_bits_left(gb) >= 10) {
|
||||||
if (zero_encoding) {
|
if (zero_encoding) {
|
||||||
for (k = 0; k < 5; k++) {
|
for (k = 0; k < 5; k++) {
|
||||||
if ((j + 2 * k) >= 128)
|
if ((j + 2 * k) >= 128)
|
||||||
@ -838,7 +834,7 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 10:
|
case 10:
|
||||||
if (BITS_LEFT(length,gb) >= 1) {
|
if (get_bits_left(gb) >= 1) {
|
||||||
float f = 0.81;
|
float f = 0.81;
|
||||||
|
|
||||||
if (get_bits1(gb))
|
if (get_bits1(gb))
|
||||||
@ -852,7 +848,7 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 16:
|
case 16:
|
||||||
if (BITS_LEFT(length,gb) >= 10) {
|
if (get_bits_left(gb) >= 10) {
|
||||||
if (zero_encoding) {
|
if (zero_encoding) {
|
||||||
for (k = 0; k < 5; k++) {
|
for (k = 0; k < 5; k++) {
|
||||||
if ((j + k) >= 128)
|
if ((j + k) >= 128)
|
||||||
@ -872,7 +868,7 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 24:
|
case 24:
|
||||||
if (BITS_LEFT(length,gb) >= 7) {
|
if (get_bits_left(gb) >= 7) {
|
||||||
n = get_bits(gb, 7);
|
n = get_bits(gb, 7);
|
||||||
for (k = 0; k < 3; k++)
|
for (k = 0; k < 3; k++)
|
||||||
samples[k] = (random_dequant_type24[n][k] - 2.0) * 0.5;
|
samples[k] = (random_dequant_type24[n][k] - 2.0) * 0.5;
|
||||||
@ -884,7 +880,7 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 30:
|
case 30:
|
||||||
if (BITS_LEFT(length,gb) >= 4) {
|
if (get_bits_left(gb) >= 4) {
|
||||||
unsigned v = qdm2_get_vlc(gb, &vlc_tab_type30, 0, 1);
|
unsigned v = qdm2_get_vlc(gb, &vlc_tab_type30, 0, 1);
|
||||||
if (v >= FF_ARRAY_ELEMS(type30_dequant))
|
if (v >= FF_ARRAY_ELEMS(type30_dequant))
|
||||||
return AVERROR_INVALIDDATA;
|
return AVERROR_INVALIDDATA;
|
||||||
@ -896,7 +892,7 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 34:
|
case 34:
|
||||||
if (BITS_LEFT(length,gb) >= 7) {
|
if (get_bits_left(gb) >= 7) {
|
||||||
if (type34_first) {
|
if (type34_first) {
|
||||||
type34_div = (float)(1 << get_bits(gb, 2));
|
type34_div = (float)(1 << get_bits(gb, 2));
|
||||||
samples[0] = ((float)get_bits(gb, 5) - 16.0) / 15.0;
|
samples[0] = ((float)get_bits(gb, 5) - 16.0) / 15.0;
|
||||||
@ -953,27 +949,26 @@ static int synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int le
|
|||||||
*
|
*
|
||||||
* @param quantized_coeffs pointer to quantized_coeffs[ch][0]
|
* @param quantized_coeffs pointer to quantized_coeffs[ch][0]
|
||||||
* @param gb bitreader context
|
* @param gb bitreader context
|
||||||
* @param length packet length in bits
|
|
||||||
*/
|
*/
|
||||||
static int init_quantized_coeffs_elem0 (int8_t *quantized_coeffs, GetBitContext *gb, int length)
|
static int init_quantized_coeffs_elem0 (int8_t *quantized_coeffs, GetBitContext *gb)
|
||||||
{
|
{
|
||||||
int i, k, run, level, diff;
|
int i, k, run, level, diff;
|
||||||
|
|
||||||
if (BITS_LEFT(length,gb) < 16)
|
if (get_bits_left(gb) < 16)
|
||||||
return -1;
|
return -1;
|
||||||
level = qdm2_get_vlc(gb, &vlc_tab_level, 0, 2);
|
level = qdm2_get_vlc(gb, &vlc_tab_level, 0, 2);
|
||||||
|
|
||||||
quantized_coeffs[0] = level;
|
quantized_coeffs[0] = level;
|
||||||
|
|
||||||
for (i = 0; i < 7; ) {
|
for (i = 0; i < 7; ) {
|
||||||
if (BITS_LEFT(length,gb) < 16)
|
if (get_bits_left(gb) < 16)
|
||||||
return -1;
|
return -1;
|
||||||
run = qdm2_get_vlc(gb, &vlc_tab_run, 0, 1) + 1;
|
run = qdm2_get_vlc(gb, &vlc_tab_run, 0, 1) + 1;
|
||||||
|
|
||||||
if (i + run >= 8)
|
if (i + run >= 8)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (BITS_LEFT(length,gb) < 16)
|
if (get_bits_left(gb) < 16)
|
||||||
return -1;
|
return -1;
|
||||||
diff = qdm2_get_se_vlc(&vlc_tab_diff, gb, 2);
|
diff = qdm2_get_se_vlc(&vlc_tab_diff, gb, 2);
|
||||||
|
|
||||||
@ -994,16 +989,15 @@ static int init_quantized_coeffs_elem0 (int8_t *quantized_coeffs, GetBitContext
|
|||||||
*
|
*
|
||||||
* @param q context
|
* @param q context
|
||||||
* @param gb bitreader context
|
* @param gb bitreader context
|
||||||
* @param length packet length in bits
|
|
||||||
*/
|
*/
|
||||||
static void init_tone_level_dequantization (QDM2Context *q, GetBitContext *gb, int length)
|
static void init_tone_level_dequantization (QDM2Context *q, GetBitContext *gb)
|
||||||
{
|
{
|
||||||
int sb, j, k, n, ch;
|
int sb, j, k, n, ch;
|
||||||
|
|
||||||
for (ch = 0; ch < q->nb_channels; ch++) {
|
for (ch = 0; ch < q->nb_channels; ch++) {
|
||||||
init_quantized_coeffs_elem0(q->quantized_coeffs[ch][0], gb, length);
|
init_quantized_coeffs_elem0(q->quantized_coeffs[ch][0], gb);
|
||||||
|
|
||||||
if (BITS_LEFT(length,gb) < 16) {
|
if (get_bits_left(gb) < 16) {
|
||||||
memset(q->quantized_coeffs[ch][0], 0, 8);
|
memset(q->quantized_coeffs[ch][0], 0, 8);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1014,11 +1008,11 @@ static void init_tone_level_dequantization (QDM2Context *q, GetBitContext *gb, i
|
|||||||
for (sb = 0; sb < n; sb++)
|
for (sb = 0; sb < n; sb++)
|
||||||
for (ch = 0; ch < q->nb_channels; ch++)
|
for (ch = 0; ch < q->nb_channels; ch++)
|
||||||
for (j = 0; j < 8; j++) {
|
for (j = 0; j < 8; j++) {
|
||||||
if (BITS_LEFT(length,gb) < 1)
|
if (get_bits_left(gb) < 1)
|
||||||
break;
|
break;
|
||||||
if (get_bits1(gb)) {
|
if (get_bits1(gb)) {
|
||||||
for (k=0; k < 8; k++) {
|
for (k=0; k < 8; k++) {
|
||||||
if (BITS_LEFT(length,gb) < 16)
|
if (get_bits_left(gb) < 16)
|
||||||
break;
|
break;
|
||||||
q->tone_level_idx_hi1[ch][sb][j][k] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_hi1, 0, 2);
|
q->tone_level_idx_hi1[ch][sb][j][k] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_hi1, 0, 2);
|
||||||
}
|
}
|
||||||
@ -1032,7 +1026,7 @@ static void init_tone_level_dequantization (QDM2Context *q, GetBitContext *gb, i
|
|||||||
|
|
||||||
for (sb = 0; sb < n; sb++)
|
for (sb = 0; sb < n; sb++)
|
||||||
for (ch = 0; ch < q->nb_channels; ch++) {
|
for (ch = 0; ch < q->nb_channels; ch++) {
|
||||||
if (BITS_LEFT(length,gb) < 16)
|
if (get_bits_left(gb) < 16)
|
||||||
break;
|
break;
|
||||||
q->tone_level_idx_hi2[ch][sb] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_hi2, 0, 2);
|
q->tone_level_idx_hi2[ch][sb] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_hi2, 0, 2);
|
||||||
if (sb > 19)
|
if (sb > 19)
|
||||||
@ -1047,7 +1041,7 @@ static void init_tone_level_dequantization (QDM2Context *q, GetBitContext *gb, i
|
|||||||
for (sb = 0; sb < n; sb++)
|
for (sb = 0; sb < n; sb++)
|
||||||
for (ch = 0; ch < q->nb_channels; ch++)
|
for (ch = 0; ch < q->nb_channels; ch++)
|
||||||
for (j = 0; j < 8; j++) {
|
for (j = 0; j < 8; j++) {
|
||||||
if (BITS_LEFT(length,gb) < 16)
|
if (get_bits_left(gb) < 16)
|
||||||
break;
|
break;
|
||||||
q->tone_level_idx_mid[ch][sb][j] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_mid, 0, 2) - 32;
|
q->tone_level_idx_mid[ch][sb][j] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_mid, 0, 2) - 32;
|
||||||
}
|
}
|
||||||
@ -1103,14 +1097,13 @@ static int process_subpacket_9 (QDM2Context *q, QDM2SubPNode *node)
|
|||||||
* @param node pointer to node with packet
|
* @param node pointer to node with packet
|
||||||
* @param length packet length in bits
|
* @param length packet length in bits
|
||||||
*/
|
*/
|
||||||
static void process_subpacket_10 (QDM2Context *q, QDM2SubPNode *node, int length)
|
static void process_subpacket_10 (QDM2Context *q, QDM2SubPNode *node)
|
||||||
{
|
{
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
|
|
||||||
init_get_bits(&gb, ((node == NULL) ? empty_buffer : node->packet->data), ((node == NULL) ? 0 : node->packet->size*8));
|
if (node) {
|
||||||
|
init_get_bits(&gb, node->packet->data, node->packet->size * 8);
|
||||||
if (length != 0) {
|
init_tone_level_dequantization(q, &gb);
|
||||||
init_tone_level_dequantization(q, &gb, length);
|
|
||||||
fill_tone_level_array(q, 1);
|
fill_tone_level_array(q, 1);
|
||||||
} else {
|
} else {
|
||||||
fill_tone_level_array(q, 0);
|
fill_tone_level_array(q, 0);
|
||||||
@ -1123,13 +1116,17 @@ static void process_subpacket_10 (QDM2Context *q, QDM2SubPNode *node, int length
|
|||||||
*
|
*
|
||||||
* @param q context
|
* @param q context
|
||||||
* @param node pointer to node with packet
|
* @param node pointer to node with packet
|
||||||
* @param length packet length in bit
|
|
||||||
*/
|
*/
|
||||||
static void process_subpacket_11 (QDM2Context *q, QDM2SubPNode *node, int length)
|
static void process_subpacket_11 (QDM2Context *q, QDM2SubPNode *node)
|
||||||
{
|
{
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
|
int length = 0;
|
||||||
|
|
||||||
|
if (node) {
|
||||||
|
length = node->packet->size * 8;
|
||||||
|
init_get_bits(&gb, node->packet->data, length);
|
||||||
|
}
|
||||||
|
|
||||||
init_get_bits(&gb, ((node == NULL) ? empty_buffer : node->packet->data), ((node == NULL) ? 0 : node->packet->size*8));
|
|
||||||
if (length >= 32) {
|
if (length >= 32) {
|
||||||
int c = get_bits (&gb, 13);
|
int c = get_bits (&gb, 13);
|
||||||
|
|
||||||
@ -1149,11 +1146,16 @@ static void process_subpacket_11 (QDM2Context *q, QDM2SubPNode *node, int length
|
|||||||
* @param node pointer to node with packet
|
* @param node pointer to node with packet
|
||||||
* @param length packet length in bits
|
* @param length packet length in bits
|
||||||
*/
|
*/
|
||||||
static void process_subpacket_12 (QDM2Context *q, QDM2SubPNode *node, int length)
|
static void process_subpacket_12 (QDM2Context *q, QDM2SubPNode *node)
|
||||||
{
|
{
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
|
int length = 0;
|
||||||
|
|
||||||
|
if (node) {
|
||||||
|
length = node->packet->size * 8;
|
||||||
|
init_get_bits(&gb, node->packet->data, length);
|
||||||
|
}
|
||||||
|
|
||||||
init_get_bits(&gb, ((node == NULL) ? empty_buffer : node->packet->data), ((node == NULL) ? 0 : node->packet->size*8));
|
|
||||||
synthfilt_build_sb_samples(q, &gb, length, 8, QDM2_SB_USED(q->sub_sampling));
|
synthfilt_build_sb_samples(q, &gb, length, 8, QDM2_SB_USED(q->sub_sampling));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1173,21 +1175,21 @@ static void process_synthesis_subpackets (QDM2Context *q, QDM2SubPNode *list)
|
|||||||
|
|
||||||
nodes[1] = qdm2_search_subpacket_type_in_list(list, 10);
|
nodes[1] = qdm2_search_subpacket_type_in_list(list, 10);
|
||||||
if (nodes[1] != NULL)
|
if (nodes[1] != NULL)
|
||||||
process_subpacket_10(q, nodes[1], nodes[1]->packet->size << 3);
|
process_subpacket_10(q, nodes[1]);
|
||||||
else
|
else
|
||||||
process_subpacket_10(q, NULL, 0);
|
process_subpacket_10(q, NULL);
|
||||||
|
|
||||||
nodes[2] = qdm2_search_subpacket_type_in_list(list, 11);
|
nodes[2] = qdm2_search_subpacket_type_in_list(list, 11);
|
||||||
if (nodes[0] != NULL && nodes[1] != NULL && nodes[2] != NULL)
|
if (nodes[0] != NULL && nodes[1] != NULL && nodes[2] != NULL)
|
||||||
process_subpacket_11(q, nodes[2], (nodes[2]->packet->size << 3));
|
process_subpacket_11(q, nodes[2]);
|
||||||
else
|
else
|
||||||
process_subpacket_11(q, NULL, 0);
|
process_subpacket_11(q, NULL);
|
||||||
|
|
||||||
nodes[3] = qdm2_search_subpacket_type_in_list(list, 12);
|
nodes[3] = qdm2_search_subpacket_type_in_list(list, 12);
|
||||||
if (nodes[0] != NULL && nodes[1] != NULL && nodes[3] != NULL)
|
if (nodes[0] != NULL && nodes[1] != NULL && nodes[3] != NULL)
|
||||||
process_subpacket_12(q, nodes[3], (nodes[3]->packet->size << 3));
|
process_subpacket_12(q, nodes[3]);
|
||||||
else
|
else
|
||||||
process_subpacket_12(q, NULL, 0);
|
process_subpacket_12(q, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1309,9 +1311,9 @@ static void qdm2_decode_super_block (QDM2Context *q)
|
|||||||
process_synthesis_subpackets(q, q->sub_packet_list_D);
|
process_synthesis_subpackets(q, q->sub_packet_list_D);
|
||||||
q->do_synth_filter = 1;
|
q->do_synth_filter = 1;
|
||||||
} else if (q->do_synth_filter) {
|
} else if (q->do_synth_filter) {
|
||||||
process_subpacket_10(q, NULL, 0);
|
process_subpacket_10(q, NULL);
|
||||||
process_subpacket_11(q, NULL, 0);
|
process_subpacket_11(q, NULL);
|
||||||
process_subpacket_12(q, NULL, 0);
|
process_subpacket_12(q, NULL);
|
||||||
}
|
}
|
||||||
/* **************************************************************** */
|
/* **************************************************************** */
|
||||||
}
|
}
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
|
VIS-OBJS += sparc/dsputil_vis.o \
|
||||||
sparc/simple_idct_vis.o \
|
sparc/simple_idct_vis.o \
|
||||||
|
@ -58,6 +58,7 @@ typedef struct TsccContext {
|
|||||||
unsigned int decomp_size;
|
unsigned int decomp_size;
|
||||||
// Decompression buffer
|
// Decompression buffer
|
||||||
unsigned char* decomp_buf;
|
unsigned char* decomp_buf;
|
||||||
|
GetByteContext gb;
|
||||||
int height;
|
int height;
|
||||||
z_stream zstream;
|
z_stream zstream;
|
||||||
|
|
||||||
@ -105,8 +106,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if(zret != Z_DATA_ERROR)
|
if (zret != Z_DATA_ERROR) {
|
||||||
ff_msrle_decode(avctx, (AVPicture*)&c->pic, c->bpp, c->decomp_buf, c->decomp_size - c->zstream.avail_out);
|
bytestream2_init(&c->gb, c->decomp_buf,
|
||||||
|
c->decomp_size - c->zstream.avail_out);
|
||||||
|
ff_msrle_decode(avctx, (AVPicture*)&c->pic, c->bpp, &c->gb);
|
||||||
|
}
|
||||||
|
|
||||||
/* make the palette available on the way out */
|
/* make the palette available on the way out */
|
||||||
if (c->avctx->pix_fmt == PIX_FMT_PAL8) {
|
if (c->avctx->pix_fmt == PIX_FMT_PAL8) {
|
||||||
|
@ -2,7 +2,7 @@ OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
|
|||||||
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
|
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
|
||||||
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
|
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
|
||||||
|
|
||||||
OBJS-$(HAVE_MMX) += x86/dsputil_mmx.o \
|
MMX-OBJS += x86/dsputil_mmx.o \
|
||||||
x86/fdct_mmx.o \
|
x86/fdct_mmx.o \
|
||||||
x86/fmtconvert_mmx.o \
|
x86/fmtconvert_mmx.o \
|
||||||
x86/idct_mmx_xvid.o \
|
x86/idct_mmx_xvid.o \
|
||||||
@ -74,7 +74,6 @@ YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp.o \
|
|||||||
x86/vp56dsp.o
|
x86/vp56dsp.o
|
||||||
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
|
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
|
||||||
|
|
||||||
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
|
YASM-OBJS += x86/dsputil_yasm.o \
|
||||||
x86/deinterlace.o \
|
x86/deinterlace.o \
|
||||||
x86/fmtconvert.o \
|
x86/fmtconvert.o \
|
||||||
$(YASM-OBJS-yes)
|
|
||||||
|
@ -136,10 +136,10 @@ cglobal put_signed_rect_clamped_%1, 5,7,3, dst, dst_stride, src, src_stride, w,
|
|||||||
and wd, ~(mmsize-1)
|
and wd, ~(mmsize-1)
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10d, r5m
|
mov r7d, r5m
|
||||||
mov r11d, wd
|
mov r8d, wd
|
||||||
%define wspill r11d
|
%define wspill r8d
|
||||||
%define hd r10d
|
%define hd r7d
|
||||||
%else
|
%else
|
||||||
mov r4m, wd
|
mov r4m, wd
|
||||||
%define wspill r4m
|
%define wspill r4m
|
||||||
|
@ -497,9 +497,9 @@ cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset
|
|||||||
|
|
||||||
%macro EMU_EDGE_FUNC 0
|
%macro EMU_EDGE_FUNC 0
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define w_reg r10
|
%define w_reg r7
|
||||||
cglobal emu_edge_core, 6, 7, 1
|
cglobal emu_edge_core, 6, 9, 1
|
||||||
mov r11, r5 ; save block_h
|
mov r8, r5 ; save block_h
|
||||||
%else
|
%else
|
||||||
%define w_reg r6
|
%define w_reg r6
|
||||||
cglobal emu_edge_core, 2, 7, 0
|
cglobal emu_edge_core, 2, 7, 0
|
||||||
@ -536,7 +536,7 @@ cglobal emu_edge_core, 2, 7, 0
|
|||||||
sub r0, w_reg
|
sub r0, w_reg
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r3, r0 ; backup of buf+block_h*linesize
|
mov r3, r0 ; backup of buf+block_h*linesize
|
||||||
mov r5, r11
|
mov r5, r8
|
||||||
%else
|
%else
|
||||||
mov r0m, r0 ; backup of buf+block_h*linesize
|
mov r0m, r0 ; backup of buf+block_h*linesize
|
||||||
mov r5, r5m
|
mov r5, r5m
|
||||||
@ -550,7 +550,7 @@ cglobal emu_edge_core, 2, 7, 0
|
|||||||
; FIXME we can do a if size == 1 here if that makes any speed difference, test me
|
; FIXME we can do a if size == 1 here if that makes any speed difference, test me
|
||||||
sar w_reg, 1
|
sar w_reg, 1
|
||||||
sal w_reg, 6
|
sal w_reg, 6
|
||||||
; r0=buf+block_h*linesize,r10(64)/r6(32)=start_x offset for funcs
|
; r0=buf+block_h*linesize,r7(64)/r6(32)=start_x offset for funcs
|
||||||
; r6(rax)/r3(ebx)=val,r2=linesize,r1=start_x,r5=block_h
|
; r6(rax)/r3(ebx)=val,r2=linesize,r1=start_x,r5=block_h
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea rax, [.emuedge_extend_left_2]
|
lea rax, [.emuedge_extend_left_2]
|
||||||
@ -560,7 +560,7 @@ cglobal emu_edge_core, 2, 7, 0
|
|||||||
%endif
|
%endif
|
||||||
call w_reg
|
call w_reg
|
||||||
|
|
||||||
; now r3(64)/r0(32)=buf,r2=linesize,r11/r5=block_h,r6/r3=val, r10/r6=end_x, r1=block_w
|
; now r3(64)/r0(32)=buf,r2=linesize,r8/r5=block_h,r6/r3=val, r7/r6=end_x, r1=block_w
|
||||||
.right_extend:
|
.right_extend:
|
||||||
%if ARCH_X86_32
|
%if ARCH_X86_32
|
||||||
mov r0, r0m
|
mov r0, r0m
|
||||||
@ -591,7 +591,7 @@ cglobal emu_edge_core, 2, 7, 0
|
|||||||
%define vall al
|
%define vall al
|
||||||
%define valh ah
|
%define valh ah
|
||||||
%define valw ax
|
%define valw ax
|
||||||
%define valw2 r10w
|
%define valw2 r7w
|
||||||
%define valw3 r3w
|
%define valw3 r3w
|
||||||
%if WIN64
|
%if WIN64
|
||||||
%define valw4 r4w
|
%define valw4 r4w
|
||||||
@ -618,7 +618,7 @@ cglobal emu_edge_core, 2, 7, 0
|
|||||||
; - else if (%2 & 8) fills 8 bytes into mm0
|
; - else if (%2 & 8) fills 8 bytes into mm0
|
||||||
; - if (%2 & 7 == 4) fills the last 4 bytes into rax
|
; - if (%2 & 7 == 4) fills the last 4 bytes into rax
|
||||||
; - else if (%2 & 4) fills 4 bytes into mm0-1
|
; - else if (%2 & 4) fills 4 bytes into mm0-1
|
||||||
; - if (%2 & 3 == 3) fills 2 bytes into r10/r3, and 1 into eax
|
; - if (%2 & 3 == 3) fills 2 bytes into r7/r3, and 1 into eax
|
||||||
; (note that we're using r3 for body/bottom because it's a shorter
|
; (note that we're using r3 for body/bottom because it's a shorter
|
||||||
; opcode, and then the loop fits in 128 bytes)
|
; opcode, and then the loop fits in 128 bytes)
|
||||||
; - else fills remaining bytes into rax
|
; - else fills remaining bytes into rax
|
||||||
@ -848,7 +848,7 @@ ALIGN 64
|
|||||||
%endrep
|
%endrep
|
||||||
%endmacro ; LEFT_EXTEND
|
%endmacro ; LEFT_EXTEND
|
||||||
|
|
||||||
; r3/r0=buf+block_h*linesize, r2=linesize, r11/r5=block_h, r0/r6=end_x, r6/r3=val
|
; r3/r0=buf+block_h*linesize, r2=linesize, r8/r5=block_h, r0/r6=end_x, r6/r3=val
|
||||||
%macro RIGHT_EXTEND 0
|
%macro RIGHT_EXTEND 0
|
||||||
%assign %%n 2
|
%assign %%n 2
|
||||||
%rep 11
|
%rep 11
|
||||||
@ -858,7 +858,7 @@ ALIGN 64
|
|||||||
sub r3, r2 ; dst -= linesize
|
sub r3, r2 ; dst -= linesize
|
||||||
READ_V_PIXEL %%n, [r3+w_reg-1] ; read pixels
|
READ_V_PIXEL %%n, [r3+w_reg-1] ; read pixels
|
||||||
WRITE_V_PIXEL %%n, r3+r4-%%n ; write pixels
|
WRITE_V_PIXEL %%n, r3+r4-%%n ; write pixels
|
||||||
dec r11
|
dec r8
|
||||||
%else ; ARCH_X86_32
|
%else ; ARCH_X86_32
|
||||||
sub r0, r2 ; dst -= linesize
|
sub r0, r2 ; dst -= linesize
|
||||||
READ_V_PIXEL %%n, [r0+w_reg-1] ; read pixels
|
READ_V_PIXEL %%n, [r0+w_reg-1] ; read pixels
|
||||||
@ -937,11 +937,11 @@ ALIGN 64
|
|||||||
%macro SLOW_V_EXTEND 0
|
%macro SLOW_V_EXTEND 0
|
||||||
.slow_v_extend_loop:
|
.slow_v_extend_loop:
|
||||||
; r0=buf,r1=src,r2(64)/r2m(32)=linesize,r3(64)/r3m(32)=start_x,r4=end_y,r5=block_h
|
; r0=buf,r1=src,r2(64)/r2m(32)=linesize,r3(64)/r3m(32)=start_x,r4=end_y,r5=block_h
|
||||||
; r11(64)/r3(later-64)/r2(32)=cnt_reg,r6(64)/r3(32)=val_reg,r10(64)/r6(32)=w=end_x-start_x
|
; r8(64)/r3(later-64)/r2(32)=cnt_reg,r6(64)/r3(32)=val_reg,r7(64)/r6(32)=w=end_x-start_x
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
push r11 ; save old value of block_h
|
push r8 ; save old value of block_h
|
||||||
test r3, r3
|
test r3, r3
|
||||||
%define cnt_reg r11
|
%define cnt_reg r8
|
||||||
jz .do_body_copy ; if (!start_y) goto do_body_copy
|
jz .do_body_copy ; if (!start_y) goto do_body_copy
|
||||||
V_COPY_ROW top, r3
|
V_COPY_ROW top, r3
|
||||||
%else
|
%else
|
||||||
@ -955,7 +955,7 @@ ALIGN 64
|
|||||||
V_COPY_ROW body, r4
|
V_COPY_ROW body, r4
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
pop r11 ; restore old value of block_h
|
pop r8 ; restore old value of block_h
|
||||||
%define cnt_reg r3
|
%define cnt_reg r3
|
||||||
%endif
|
%endif
|
||||||
test r5, r5
|
test r5, r5
|
||||||
@ -974,7 +974,7 @@ ALIGN 64
|
|||||||
|
|
||||||
%macro SLOW_LEFT_EXTEND 0
|
%macro SLOW_LEFT_EXTEND 0
|
||||||
.slow_left_extend_loop:
|
.slow_left_extend_loop:
|
||||||
; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r10/r6=start_x
|
; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r7/r6=start_x
|
||||||
mov r4, 8
|
mov r4, 8
|
||||||
sub r0, linesize
|
sub r0, linesize
|
||||||
READ_V_PIXEL 8, [r0+w_reg]
|
READ_V_PIXEL 8, [r0+w_reg]
|
||||||
@ -1002,11 +1002,11 @@ ALIGN 64
|
|||||||
|
|
||||||
%macro SLOW_RIGHT_EXTEND 0
|
%macro SLOW_RIGHT_EXTEND 0
|
||||||
.slow_right_extend_loop:
|
.slow_right_extend_loop:
|
||||||
; r3(64)/r0(32)=buf+block_h*linesize,r2=linesize,r4=block_w,r11(64)/r5(32)=block_h,
|
; r3(64)/r0(32)=buf+block_h*linesize,r2=linesize,r4=block_w,r8(64)/r5(32)=block_h,
|
||||||
; r10(64)/r6(32)=end_x,r6/r3=val,r1=cntr
|
; r7(64)/r6(32)=end_x,r6/r3=val,r1=cntr
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define buf_reg r3
|
%define buf_reg r3
|
||||||
%define bh_reg r11
|
%define bh_reg r8
|
||||||
%else
|
%else
|
||||||
%define buf_reg r0
|
%define buf_reg r0
|
||||||
%define bh_reg r5
|
%define bh_reg r5
|
||||||
|
@ -750,14 +750,11 @@ INIT_XMM
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro DECL_IMDCT 2
|
%macro DECL_IMDCT 2
|
||||||
cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
|
cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define rrevtab r10
|
%define rrevtab r7
|
||||||
%define rtcos r11
|
%define rtcos r8
|
||||||
%define rtsin r12
|
%define rtsin r9
|
||||||
push r12
|
|
||||||
push r13
|
|
||||||
push r14
|
|
||||||
%else
|
%else
|
||||||
%define rrevtab r6
|
%define rrevtab r6
|
||||||
%define rtsin r6
|
%define rtsin r6
|
||||||
@ -799,12 +796,12 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
|
|||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movzx r5, word [rrevtab+r4-4]
|
movzx r5, word [rrevtab+r4-4]
|
||||||
movzx r6, word [rrevtab+r4-2]
|
movzx r6, word [rrevtab+r4-2]
|
||||||
movzx r13, word [rrevtab+r3]
|
movzx r10, word [rrevtab+r3]
|
||||||
movzx r14, word [rrevtab+r3+2]
|
movzx r11, word [rrevtab+r3+2]
|
||||||
movlps [r1+r5 *8], xmm0
|
movlps [r1+r5 *8], xmm0
|
||||||
movhps [r1+r6 *8], xmm0
|
movhps [r1+r6 *8], xmm0
|
||||||
movlps [r1+r13*8], xmm1
|
movlps [r1+r10*8], xmm1
|
||||||
movhps [r1+r14*8], xmm1
|
movhps [r1+r11*8], xmm1
|
||||||
add r4, 4
|
add r4, 4
|
||||||
%else
|
%else
|
||||||
mov r6, [esp]
|
mov r6, [esp]
|
||||||
@ -840,11 +837,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
|
|||||||
mov r1, -mmsize
|
mov r1, -mmsize
|
||||||
sub r1, r0
|
sub r1, r0
|
||||||
%2 r0, r1, r6, rtcos, rtsin
|
%2 r0, r1, r6, rtcos, rtsin
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64 == 0
|
||||||
pop r14
|
|
||||||
pop r13
|
|
||||||
pop r12
|
|
||||||
%else
|
|
||||||
add esp, 12
|
add esp, 12
|
||||||
%endif
|
%endif
|
||||||
%ifidn avx_enabled, 1
|
%ifidn avx_enabled, 1
|
||||||
|
@ -179,9 +179,8 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
|
|||||||
|
|
||||||
%macro FLOAT_TO_INT16_INTERLEAVE6 1
|
%macro FLOAT_TO_INT16_INTERLEAVE6 1
|
||||||
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
|
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
|
||||||
cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5
|
cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, src5, len
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define lend r10d
|
|
||||||
mov lend, r2d
|
mov lend, r2d
|
||||||
%else
|
%else
|
||||||
%define lend dword r2m
|
%define lend dword r2m
|
||||||
@ -240,9 +239,8 @@ FLOAT_TO_INT16_INTERLEAVE6 3dn2
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro FLOAT_INTERLEAVE6 2
|
%macro FLOAT_INTERLEAVE6 2
|
||||||
cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5
|
cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, len
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define lend r10d
|
|
||||||
mov lend, r2d
|
mov lend, r2d
|
||||||
%else
|
%else
|
||||||
%define lend dword r2m
|
%define lend dword r2m
|
||||||
|
@ -91,9 +91,22 @@ SECTION .text
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro chroma_mc8_mmx_func 3
|
%macro chroma_mc8_mmx_func 3
|
||||||
|
%ifidn %2, rv40
|
||||||
|
%ifdef PIC
|
||||||
|
%define rnd_1d_rv40 r8
|
||||||
|
%define rnd_2d_rv40 r8
|
||||||
|
%define extra_regs 2
|
||||||
|
%else ; no-PIC
|
||||||
|
%define rnd_1d_rv40 rnd_rv40_1d_tbl
|
||||||
|
%define rnd_2d_rv40 rnd_rv40_2d_tbl
|
||||||
|
%define extra_regs 1
|
||||||
|
%endif ; PIC
|
||||||
|
%else
|
||||||
|
%define extra_regs 0
|
||||||
|
%endif ; rv40
|
||||||
; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
|
; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
|
||||||
; int stride, int h, int mx, int my)
|
; int stride, int h, int mx, int my)
|
||||||
cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
|
cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movsxd r2, r2d
|
movsxd r2, r2d
|
||||||
%endif
|
%endif
|
||||||
@ -106,19 +119,12 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
|
|||||||
|
|
||||||
.at_least_one_non_zero
|
.at_least_one_non_zero
|
||||||
%ifidn %2, rv40
|
%ifidn %2, rv40
|
||||||
%ifdef PIC
|
|
||||||
%define rnd_1d_rv40 r11
|
|
||||||
%define rnd_2d_rv40 r11
|
|
||||||
%else ; no-PIC
|
|
||||||
%define rnd_1d_rv40 rnd_rv40_1d_tbl
|
|
||||||
%define rnd_2d_rv40 rnd_rv40_2d_tbl
|
|
||||||
%endif
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10, r5
|
mov r7, r5
|
||||||
and r10, 6 ; &~1 for mx/my=[0,7]
|
and r7, 6 ; &~1 for mx/my=[0,7]
|
||||||
lea r10, [r10*4+r4]
|
lea r7, [r7*4+r4]
|
||||||
sar r10d, 1
|
sar r7d, 1
|
||||||
%define rnd_bias r10
|
%define rnd_bias r7
|
||||||
%define dest_reg r0
|
%define dest_reg r0
|
||||||
%else ; x86-32
|
%else ; x86-32
|
||||||
mov r0, r5
|
mov r0, r5
|
||||||
@ -145,7 +151,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
|
|||||||
|
|
||||||
%ifidn %2, rv40
|
%ifidn %2, rv40
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [rnd_rv40_1d_tbl]
|
lea r8, [rnd_rv40_1d_tbl]
|
||||||
%endif
|
%endif
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r5, r0m
|
mov r5, r0m
|
||||||
@ -196,7 +202,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
|
|||||||
movd m6, r5d ; y
|
movd m6, r5d ; y
|
||||||
%ifidn %2, rv40
|
%ifidn %2, rv40
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [rnd_rv40_2d_tbl]
|
lea r8, [rnd_rv40_2d_tbl]
|
||||||
%endif
|
%endif
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r5, r0m
|
mov r5, r0m
|
||||||
@ -278,7 +284,13 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro chroma_mc4_mmx_func 3
|
%macro chroma_mc4_mmx_func 3
|
||||||
cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
|
%define extra_regs 0
|
||||||
|
%ifidn %2, rv40
|
||||||
|
%ifdef PIC
|
||||||
|
%define extra_regs 1
|
||||||
|
%endif ; PIC
|
||||||
|
%endif ; rv40
|
||||||
|
cglobal %1_%2_chroma_mc4_%3, 6, 6 + extra_regs, 0
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movsxd r2, r2d
|
movsxd r2, r2d
|
||||||
%endif
|
%endif
|
||||||
@ -296,8 +308,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
|
|||||||
|
|
||||||
%ifidn %2, rv40
|
%ifidn %2, rv40
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [rnd_rv40_2d_tbl]
|
lea r6, [rnd_rv40_2d_tbl]
|
||||||
%define rnd_2d_rv40 r11
|
%define rnd_2d_rv40 r6
|
||||||
%else
|
%else
|
||||||
%define rnd_2d_rv40 rnd_rv40_2d_tbl
|
%define rnd_2d_rv40 rnd_rv40_2d_tbl
|
||||||
%endif
|
%endif
|
||||||
|
@ -328,11 +328,11 @@ cglobal deblock_v_luma_8_%1, 5,5,10
|
|||||||
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
|
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
INIT_MMX
|
INIT_MMX
|
||||||
cglobal deblock_h_luma_8_%1, 5,7
|
cglobal deblock_h_luma_8_%1, 5,9
|
||||||
movsxd r10, r1d
|
movsxd r7, r1d
|
||||||
lea r11, [r10+r10*2]
|
lea r8, [r7+r7*2]
|
||||||
lea r6, [r0-4]
|
lea r6, [r0-4]
|
||||||
lea r5, [r0-4+r11]
|
lea r5, [r0-4+r8]
|
||||||
%if WIN64
|
%if WIN64
|
||||||
sub rsp, 0x98
|
sub rsp, 0x98
|
||||||
%define pix_tmp rsp+0x30
|
%define pix_tmp rsp+0x30
|
||||||
@ -342,14 +342,14 @@ cglobal deblock_h_luma_8_%1, 5,7
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
; transpose 6x16 -> tmp space
|
; transpose 6x16 -> tmp space
|
||||||
TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r10, r11), pix_tmp
|
TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r7, r8), pix_tmp
|
||||||
lea r6, [r6+r10*8]
|
lea r6, [r6+r7*8]
|
||||||
lea r5, [r5+r10*8]
|
lea r5, [r5+r7*8]
|
||||||
TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r10, r11), pix_tmp+8
|
TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r7, r8), pix_tmp+8
|
||||||
|
|
||||||
; vertical filter
|
; vertical filter
|
||||||
; alpha, beta, tc0 are still in r2d, r3d, r4
|
; alpha, beta, tc0 are still in r2d, r3d, r4
|
||||||
; don't backup r6, r5, r10, r11 because deblock_v_luma_sse2 doesn't use them
|
; don't backup r6, r5, r7, r8 because deblock_v_luma_sse2 doesn't use them
|
||||||
lea r0, [pix_tmp+0x30]
|
lea r0, [pix_tmp+0x30]
|
||||||
mov r1d, 0x10
|
mov r1d, 0x10
|
||||||
%if WIN64
|
%if WIN64
|
||||||
@ -364,17 +364,17 @@ cglobal deblock_h_luma_8_%1, 5,7
|
|||||||
movq m1, [pix_tmp+0x28]
|
movq m1, [pix_tmp+0x28]
|
||||||
movq m2, [pix_tmp+0x38]
|
movq m2, [pix_tmp+0x38]
|
||||||
movq m3, [pix_tmp+0x48]
|
movq m3, [pix_tmp+0x48]
|
||||||
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11)
|
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
|
||||||
|
|
||||||
shl r10, 3
|
shl r7, 3
|
||||||
sub r6, r10
|
sub r6, r7
|
||||||
sub r5, r10
|
sub r5, r7
|
||||||
shr r10, 3
|
shr r7, 3
|
||||||
movq m0, [pix_tmp+0x10]
|
movq m0, [pix_tmp+0x10]
|
||||||
movq m1, [pix_tmp+0x20]
|
movq m1, [pix_tmp+0x20]
|
||||||
movq m2, [pix_tmp+0x30]
|
movq m2, [pix_tmp+0x30]
|
||||||
movq m3, [pix_tmp+0x40]
|
movq m3, [pix_tmp+0x40]
|
||||||
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11)
|
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
|
||||||
|
|
||||||
%if WIN64
|
%if WIN64
|
||||||
add rsp, 0x98
|
add rsp, 0x98
|
||||||
@ -709,32 +709,32 @@ INIT_MMX
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
|
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
cglobal deblock_h_luma_intra_8_%1, 4,7
|
cglobal deblock_h_luma_intra_8_%1, 4,9
|
||||||
movsxd r10, r1d
|
movsxd r7, r1d
|
||||||
lea r11, [r10*3]
|
lea r8, [r7*3]
|
||||||
lea r6, [r0-4]
|
lea r6, [r0-4]
|
||||||
lea r5, [r0-4+r11]
|
lea r5, [r0-4+r8]
|
||||||
sub rsp, 0x88
|
sub rsp, 0x88
|
||||||
%define pix_tmp rsp
|
%define pix_tmp rsp
|
||||||
|
|
||||||
; transpose 8x16 -> tmp space
|
; transpose 8x16 -> tmp space
|
||||||
TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r10, r11), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
|
TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
|
||||||
lea r6, [r6+r10*8]
|
lea r6, [r6+r7*8]
|
||||||
lea r5, [r5+r10*8]
|
lea r5, [r5+r7*8]
|
||||||
TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r10, r11), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
|
TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
|
||||||
|
|
||||||
lea r0, [pix_tmp+0x40]
|
lea r0, [pix_tmp+0x40]
|
||||||
mov r1, 0x10
|
mov r1, 0x10
|
||||||
call deblock_v_luma_intra_8_%1
|
call deblock_v_luma_intra_8_%1
|
||||||
|
|
||||||
; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
|
; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
|
||||||
lea r5, [r6+r11]
|
lea r5, [r6+r8]
|
||||||
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r6, r5, r10, r11)
|
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r6, r5, r7, r8)
|
||||||
shl r10, 3
|
shl r7, 3
|
||||||
sub r6, r10
|
sub r6, r7
|
||||||
sub r5, r10
|
sub r5, r7
|
||||||
shr r10, 3
|
shr r7, 3
|
||||||
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r10, r11)
|
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r7, r8)
|
||||||
add rsp, 0x88
|
add rsp, 0x88
|
||||||
RET
|
RET
|
||||||
%else
|
%else
|
||||||
|
@ -45,8 +45,10 @@ scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
|
|||||||
db 4+13*8, 5+13*8, 4+14*8, 5+14*8
|
db 4+13*8, 5+13*8, 4+14*8, 5+14*8
|
||||||
db 6+13*8, 7+13*8, 6+14*8, 7+14*8
|
db 6+13*8, 7+13*8, 6+14*8, 7+14*8
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
%define scan8 r11
|
%define npicregs 1
|
||||||
|
%define scan8 picregq
|
||||||
%else
|
%else
|
||||||
|
%define npicregs 0
|
||||||
%define scan8 scan8_mem
|
%define scan8 scan8_mem
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -301,10 +303,10 @@ cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0
|
|||||||
|
|
||||||
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add16_8_mmx, 5, 7, 0
|
cglobal h264_idct_add16_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
|
||||||
xor r5, r5
|
xor r5, r5
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
@ -323,13 +325,13 @@ cglobal h264_idct_add16_8_mmx, 5, 7, 0
|
|||||||
|
|
||||||
; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct8_add4_8_mmx, 5, 7, 0
|
cglobal h264_idct8_add4_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
|
||||||
%assign pad 128+4-(stack_offset&7)
|
%assign pad 128+4-(stack_offset&7)
|
||||||
SUB rsp, pad
|
SUB rsp, pad
|
||||||
|
|
||||||
xor r5, r5
|
xor r5, r5
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
@ -355,10 +357,10 @@ cglobal h264_idct8_add4_8_mmx, 5, 7, 0
|
|||||||
|
|
||||||
; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add16_8_mmx2, 5, 7, 0
|
cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
|
||||||
xor r5, r5
|
xor r5, r5
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
@ -371,16 +373,13 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0
|
|||||||
test r6, r6
|
test r6, r6
|
||||||
jz .no_dc
|
jz .no_dc
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMX2_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64 == 0
|
||||||
%define dst_reg r10
|
%define dst2q r1
|
||||||
%define dst_regd r10d
|
%define dst2d r1d
|
||||||
%else
|
|
||||||
%define dst_reg r1
|
|
||||||
%define dst_regd r1d
|
|
||||||
%endif
|
%endif
|
||||||
mov dst_regd, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
lea dst_reg, [r0+dst_reg]
|
lea dst2q, [r0+dst2q]
|
||||||
DC_ADD_MMX2_OP movh, dst_reg, r3, r6
|
DC_ADD_MMX2_OP movh, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -402,10 +401,10 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0
|
|||||||
|
|
||||||
; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add16intra_8_mmx, 5, 7, 0
|
cglobal h264_idct_add16intra_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
|
||||||
xor r5, r5
|
xor r5, r5
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
@ -425,10 +424,10 @@ cglobal h264_idct_add16intra_8_mmx, 5, 7, 0
|
|||||||
|
|
||||||
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
|
cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
|
||||||
xor r5, r5
|
xor r5, r5
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
@ -448,16 +447,13 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
|
|||||||
test r6, r6
|
test r6, r6
|
||||||
jz .skipblock
|
jz .skipblock
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMX2_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64 == 0
|
||||||
%define dst_reg r10
|
%define dst2q r1
|
||||||
%define dst_regd r10d
|
%define dst2d r1d
|
||||||
%else
|
|
||||||
%define dst_reg r1
|
|
||||||
%define dst_regd r1d
|
|
||||||
%endif
|
%endif
|
||||||
mov dst_regd, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
add dst_reg, r0
|
add dst2q, r0
|
||||||
DC_ADD_MMX2_OP movh, dst_reg, r3, r6
|
DC_ADD_MMX2_OP movh, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -470,13 +466,13 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
|
|||||||
|
|
||||||
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
|
cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
|
||||||
%assign pad 128+4-(stack_offset&7)
|
%assign pad 128+4-(stack_offset&7)
|
||||||
SUB rsp, pad
|
SUB rsp, pad
|
||||||
|
|
||||||
xor r5, r5
|
xor r5, r5
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
@ -489,18 +485,15 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
|
|||||||
test r6, r6
|
test r6, r6
|
||||||
jz .no_dc
|
jz .no_dc
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMX2_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64 == 0
|
||||||
%define dst_reg r10
|
%define dst2q r1
|
||||||
%define dst_regd r10d
|
%define dst2d r1d
|
||||||
%else
|
|
||||||
%define dst_reg r1
|
|
||||||
%define dst_regd r1d
|
|
||||||
%endif
|
%endif
|
||||||
mov dst_regd, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
lea dst_reg, [r0+dst_reg]
|
lea dst2q, [r0+dst2q]
|
||||||
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
||||||
lea dst_reg, [dst_reg+r3*4]
|
lea dst2q, [dst2q+r3*4]
|
||||||
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -533,10 +526,10 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
|
|||||||
INIT_XMM
|
INIT_XMM
|
||||||
; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct8_add4_8_sse2, 5, 7, 10
|
cglobal h264_idct8_add4_8_sse2, 5, 8 + npicregs, 10, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
|
||||||
xor r5, r5
|
xor r5, r5
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
@ -550,18 +543,15 @@ cglobal h264_idct8_add4_8_sse2, 5, 7, 10
|
|||||||
jz .no_dc
|
jz .no_dc
|
||||||
INIT_MMX
|
INIT_MMX
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMX2_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64 == 0
|
||||||
%define dst_reg r10
|
%define dst2q r1
|
||||||
%define dst_regd r10d
|
%define dst2d r1d
|
||||||
%else
|
|
||||||
%define dst_reg r1
|
|
||||||
%define dst_regd r1d
|
|
||||||
%endif
|
%endif
|
||||||
mov dst_regd, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
add dst_reg, r0
|
add dst2q, r0
|
||||||
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
||||||
lea dst_reg, [dst_reg+r3*4]
|
lea dst2q, [dst2q+r3*4]
|
||||||
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
|
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -572,9 +562,9 @@ INIT_MMX
|
|||||||
REP_RET
|
REP_RET
|
||||||
.no_dc
|
.no_dc
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
mov dst_regd, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
add dst_reg, r0
|
add dst2q, r0
|
||||||
IDCT8_ADD_SSE dst_reg, r2, r3, r6
|
IDCT8_ADD_SSE dst2q, r2, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -595,7 +585,7 @@ h264_idct_add8_mmx_plane:
|
|||||||
jz .skipblock
|
jz .skipblock
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r0d, dword [r1+r5*4]
|
mov r0d, dword [r1+r5*4]
|
||||||
add r0, [r10]
|
add r0, [dst2q]
|
||||||
%else
|
%else
|
||||||
mov r0, r1m ; XXX r1m here is actually r0m of the calling func
|
mov r0, r1m ; XXX r1m here is actually r0m of the calling func
|
||||||
mov r0, [r0]
|
mov r0, [r0]
|
||||||
@ -611,20 +601,20 @@ h264_idct_add8_mmx_plane:
|
|||||||
|
|
||||||
; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
|
; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add8_8_mmx, 5, 7, 0
|
cglobal h264_idct_add8_8_mmx, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
|
||||||
mov r5, 16
|
mov r5, 16
|
||||||
add r2, 512
|
add r2, 512
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10, r0
|
mov dst2q, r0
|
||||||
%endif
|
%endif
|
||||||
call h264_idct_add8_mmx_plane
|
call h264_idct_add8_mmx_plane
|
||||||
mov r5, 32
|
mov r5, 32
|
||||||
add r2, 384
|
add r2, 384
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r10, gprsize
|
add dst2q, gprsize
|
||||||
%else
|
%else
|
||||||
add r0mp, gprsize
|
add r0mp, gprsize
|
||||||
%endif
|
%endif
|
||||||
@ -639,7 +629,7 @@ h264_idct_add8_mmx2_plane
|
|||||||
jz .try_dc
|
jz .try_dc
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r0d, dword [r1+r5*4]
|
mov r0d, dword [r1+r5*4]
|
||||||
add r0, [r10]
|
add r0, [dst2q]
|
||||||
%else
|
%else
|
||||||
mov r0, r1m ; XXX r1m here is actually r0m of the calling func
|
mov r0, r1m ; XXX r1m here is actually r0m of the calling func
|
||||||
mov r0, [r0]
|
mov r0, [r0]
|
||||||
@ -658,7 +648,7 @@ h264_idct_add8_mmx2_plane
|
|||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMX2_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r0d, dword [r1+r5*4]
|
mov r0d, dword [r1+r5*4]
|
||||||
add r0, [r10]
|
add r0, [dst2q]
|
||||||
%else
|
%else
|
||||||
mov r0, r1m ; XXX r1m here is actually r0m of the calling func
|
mov r0, r1m ; XXX r1m here is actually r0m of the calling func
|
||||||
mov r0, [r0]
|
mov r0, [r0]
|
||||||
@ -674,20 +664,20 @@ h264_idct_add8_mmx2_plane
|
|||||||
|
|
||||||
; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
|
; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add8_8_mmx2, 5, 7, 0
|
cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
|
||||||
mov r5, 16
|
mov r5, 16
|
||||||
add r2, 512
|
add r2, 512
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10, r0
|
mov dst2q, r0
|
||||||
%endif
|
%endif
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
call h264_idct_add8_mmx2_plane
|
call h264_idct_add8_mmx2_plane
|
||||||
mov r5, 32
|
mov r5, 32
|
||||||
add r2, 384
|
add r2, 384
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r10, gprsize
|
add dst2q, gprsize
|
||||||
%else
|
%else
|
||||||
add r0mp, gprsize
|
add r0mp, gprsize
|
||||||
%endif
|
%endif
|
||||||
@ -739,7 +729,7 @@ x264_add8x4_idct_sse2:
|
|||||||
jz .cycle%1end
|
jz .cycle%1end
|
||||||
mov r0d, dword [r1+%1*8]
|
mov r0d, dword [r1+%1*8]
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r0, r10
|
add r0, r5
|
||||||
%else
|
%else
|
||||||
add r0, r0m
|
add r0, r0m
|
||||||
%endif
|
%endif
|
||||||
@ -752,9 +742,9 @@ x264_add8x4_idct_sse2:
|
|||||||
|
|
||||||
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add16_8_sse2, 5, 5, 8
|
cglobal h264_idct_add16_8_sse2, 5, 5 + ARCH_X86_64, 8
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10, r0
|
mov r5, r0
|
||||||
%endif
|
%endif
|
||||||
; unrolling of the loop leads to an average performance gain of
|
; unrolling of the loop leads to an average performance gain of
|
||||||
; 20-25%
|
; 20-25%
|
||||||
@ -774,7 +764,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
|
|||||||
jz .try%1dc
|
jz .try%1dc
|
||||||
mov r0d, dword [r1+%1*8]
|
mov r0d, dword [r1+%1*8]
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r0, r10
|
add r0, r7
|
||||||
%else
|
%else
|
||||||
add r0, r0m
|
add r0, r0m
|
||||||
%endif
|
%endif
|
||||||
@ -786,7 +776,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
|
|||||||
jz .cycle%1end
|
jz .cycle%1end
|
||||||
mov r0d, dword [r1+%1*8]
|
mov r0d, dword [r1+%1*8]
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r0, r10
|
add r0, r7
|
||||||
%else
|
%else
|
||||||
add r0, r0m
|
add r0, r0m
|
||||||
%endif
|
%endif
|
||||||
@ -799,9 +789,9 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
|
|||||||
|
|
||||||
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
|
cglobal h264_idct_add16intra_8_sse2, 5, 7 + ARCH_X86_64, 8
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10, r0
|
mov r7, r0
|
||||||
%endif
|
%endif
|
||||||
add16intra_sse2_cycle 0, 0xc
|
add16intra_sse2_cycle 0, 0xc
|
||||||
add16intra_sse2_cycle 1, 0x14
|
add16intra_sse2_cycle 1, 0x14
|
||||||
@ -819,7 +809,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
|
|||||||
jz .try%1dc
|
jz .try%1dc
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
||||||
add r0, [r10]
|
add r0, [r7]
|
||||||
%else
|
%else
|
||||||
mov r0, r0m
|
mov r0, r0m
|
||||||
mov r0, [r0]
|
mov r0, [r0]
|
||||||
@ -833,7 +823,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
|
|||||||
jz .cycle%1end
|
jz .cycle%1end
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
||||||
add r0, [r10]
|
add r0, [r7]
|
||||||
%else
|
%else
|
||||||
mov r0, r0m
|
mov r0, r0m
|
||||||
mov r0, [r0]
|
mov r0, [r0]
|
||||||
@ -850,15 +840,15 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
|
|||||||
|
|
||||||
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
|
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
|
||||||
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
|
||||||
cglobal h264_idct_add8_8_sse2, 5, 7, 8
|
cglobal h264_idct_add8_8_sse2, 5, 7 + ARCH_X86_64, 8
|
||||||
add r2, 512
|
add r2, 512
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10, r0
|
mov r7, r0
|
||||||
%endif
|
%endif
|
||||||
add8_sse2_cycle 0, 0x34
|
add8_sse2_cycle 0, 0x34
|
||||||
add8_sse2_cycle 1, 0x3c
|
add8_sse2_cycle 1, 0x3c
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r10, gprsize
|
add r7, gprsize
|
||||||
%else
|
%else
|
||||||
add r0mp, gprsize
|
add r0mp, gprsize
|
||||||
%endif
|
%endif
|
||||||
|
@ -29,24 +29,6 @@ SECTION_RODATA
|
|||||||
|
|
||||||
pw_pixel_max: times 8 dw ((1 << 10)-1)
|
pw_pixel_max: times 8 dw ((1 << 10)-1)
|
||||||
pd_32: times 4 dd 32
|
pd_32: times 4 dd 32
|
||||||
scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
|
|
||||||
db 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
|
|
||||||
db 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
|
|
||||||
db 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
|
|
||||||
db 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
|
|
||||||
db 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
|
|
||||||
db 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
|
|
||||||
db 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
|
|
||||||
db 4+11*8, 5+11*8, 4+12*8, 5+12*8
|
|
||||||
db 6+11*8, 7+11*8, 6+12*8, 7+12*8
|
|
||||||
db 4+13*8, 5+13*8, 4+14*8, 5+14*8
|
|
||||||
db 6+13*8, 7+13*8, 6+14*8, 7+14*8
|
|
||||||
|
|
||||||
%ifdef PIC
|
|
||||||
%define scan8 r11
|
|
||||||
%else
|
|
||||||
%define scan8 scan8_mem
|
|
||||||
%endif
|
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
@ -315,9 +297,9 @@ IDCT_ADD16INTRA_10 avx
|
|||||||
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro IDCT_ADD8 1
|
%macro IDCT_ADD8 1
|
||||||
cglobal h264_idct_add8_10_%1,5,7,7
|
cglobal h264_idct_add8_10_%1,5,8,7
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r10, r0
|
mov r7, r0
|
||||||
%endif
|
%endif
|
||||||
add r2, 1024
|
add r2, 1024
|
||||||
mov r0, [r0]
|
mov r0, [r0]
|
||||||
@ -325,7 +307,7 @@ cglobal h264_idct_add8_10_%1,5,7,7
|
|||||||
ADD16_OP_INTRA %1, 18, 4+ 7*8
|
ADD16_OP_INTRA %1, 18, 4+ 7*8
|
||||||
add r2, 1024-128*2
|
add r2, 1024-128*2
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r0, [r10+gprsize]
|
mov r0, [r7+gprsize]
|
||||||
%else
|
%else
|
||||||
mov r0, r0m
|
mov r0, r0m
|
||||||
mov r0, [r0+gprsize]
|
mov r0, [r0+gprsize]
|
||||||
|
@ -289,7 +289,7 @@ cglobal pred16x16_tm_vp8_sse2, 2,6,6
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro H264_PRED16x16_PLANE 3
|
%macro H264_PRED16x16_PLANE 3
|
||||||
cglobal pred16x16_plane_%3_%1, 2, 7, %2
|
cglobal pred16x16_plane_%3_%1, 2, 9, %2
|
||||||
mov r2, r1 ; +stride
|
mov r2, r1 ; +stride
|
||||||
neg r1 ; -stride
|
neg r1 ; -stride
|
||||||
|
|
||||||
@ -349,7 +349,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
|
|||||||
add r4, r2
|
add r4, r2
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define e_reg r11
|
%define e_reg r8
|
||||||
%else
|
%else
|
||||||
%define e_reg r0
|
%define e_reg r0
|
||||||
%endif
|
%endif
|
||||||
@ -370,8 +370,8 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
|
|||||||
|
|
||||||
movzx e_reg, byte [r3 ]
|
movzx e_reg, byte [r3 ]
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movzx r10, byte [r4+r2 ]
|
movzx r7, byte [r4+r2 ]
|
||||||
sub r10, e_reg
|
sub r7, e_reg
|
||||||
%else
|
%else
|
||||||
movzx r6, byte [r4+r2 ]
|
movzx r6, byte [r4+r2 ]
|
||||||
sub r6, e_reg
|
sub r6, e_reg
|
||||||
@ -386,7 +386,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
|
|||||||
movzx r6, byte [r3 ]
|
movzx r6, byte [r3 ]
|
||||||
sub r6, r4
|
sub r6, r4
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
lea r6, [r10+r6*2]
|
lea r6, [r7+r6*2]
|
||||||
lea r5, [r5+r6*2]
|
lea r5, [r5+r6*2]
|
||||||
add r5, r6
|
add r5, r6
|
||||||
%else
|
%else
|
||||||
@ -396,9 +396,9 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
|
|||||||
|
|
||||||
movzx r4, byte [e_reg ]
|
movzx r4, byte [e_reg ]
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movzx r10, byte [r3 +r2 ]
|
movzx r7, byte [r3 +r2 ]
|
||||||
sub r10, r4
|
sub r7, r4
|
||||||
sub r5, r10
|
sub r5, r7
|
||||||
%else
|
%else
|
||||||
movzx r6, byte [r3 +r2 ]
|
movzx r6, byte [r3 +r2 ]
|
||||||
sub r6, r4
|
sub r6, r4
|
||||||
@ -410,7 +410,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
|
|||||||
movzx r6, byte [r3 +r2*2]
|
movzx r6, byte [r3 +r2*2]
|
||||||
sub r6, r4
|
sub r6, r4
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r6, r10
|
add r6, r7
|
||||||
%endif
|
%endif
|
||||||
lea r5, [r5+r6*8]
|
lea r5, [r5+r6*8]
|
||||||
|
|
||||||
@ -588,7 +588,7 @@ H264_PRED16x16_PLANE ssse3, 8, svq3
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro H264_PRED8x8_PLANE 2
|
%macro H264_PRED8x8_PLANE 2
|
||||||
cglobal pred8x8_plane_%1, 2, 7, %2
|
cglobal pred8x8_plane_%1, 2, 9, %2
|
||||||
mov r2, r1 ; +stride
|
mov r2, r1 ; +stride
|
||||||
neg r1 ; -stride
|
neg r1 ; -stride
|
||||||
|
|
||||||
@ -642,7 +642,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
|
|||||||
add r4, r2
|
add r4, r2
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define e_reg r11
|
%define e_reg r8
|
||||||
%else
|
%else
|
||||||
%define e_reg r0
|
%define e_reg r0
|
||||||
%endif
|
%endif
|
||||||
@ -653,9 +653,9 @@ cglobal pred8x8_plane_%1, 2, 7, %2
|
|||||||
|
|
||||||
movzx e_reg, byte [r3 ]
|
movzx e_reg, byte [r3 ]
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movzx r10, byte [r4+r2 ]
|
movzx r7, byte [r4+r2 ]
|
||||||
sub r10, e_reg
|
sub r7, e_reg
|
||||||
sub r5, r10
|
sub r5, r7
|
||||||
%else
|
%else
|
||||||
movzx r6, byte [r4+r2 ]
|
movzx r6, byte [r4+r2 ]
|
||||||
sub r6, e_reg
|
sub r6, e_reg
|
||||||
@ -667,7 +667,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
|
|||||||
movzx r6, byte [r4+r2*2 ]
|
movzx r6, byte [r4+r2*2 ]
|
||||||
sub r6, e_reg
|
sub r6, e_reg
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
add r6, r10
|
add r6, r7
|
||||||
%endif
|
%endif
|
||||||
lea r5, [r5+r6*4]
|
lea r5, [r5+r6*4]
|
||||||
|
|
||||||
|
@ -121,8 +121,8 @@ MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro MCAxA_OP 8
|
%macro MCAxA_OP 8
|
||||||
cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
|
|
||||||
%if ARCH_X86_32
|
%if ARCH_X86_32
|
||||||
|
cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
|
||||||
call stub_%2_h264_qpel%4_%3_10_%1
|
call stub_%2_h264_qpel%4_%3_10_%1
|
||||||
mov r0, r0m
|
mov r0, r0m
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
@ -141,17 +141,19 @@ cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
|
|||||||
call stub_%2_h264_qpel%4_%3_10_%1
|
call stub_%2_h264_qpel%4_%3_10_%1
|
||||||
RET
|
RET
|
||||||
%else ; ARCH_X86_64
|
%else ; ARCH_X86_64
|
||||||
mov r10, r0
|
cglobal %2_h264_qpel%5_%3_10_%1, %6,%7 + 2,%8
|
||||||
mov r11, r1
|
mov r%7, r0
|
||||||
|
%assign p1 %7+1
|
||||||
|
mov r %+ p1, r1
|
||||||
call stub_%2_h264_qpel%4_%3_10_%1
|
call stub_%2_h264_qpel%4_%3_10_%1
|
||||||
lea r0, [r10+%4*2]
|
lea r0, [r%7+%4*2]
|
||||||
lea r1, [r11+%4*2]
|
lea r1, [r %+ p1+%4*2]
|
||||||
call stub_%2_h264_qpel%4_%3_10_%1
|
call stub_%2_h264_qpel%4_%3_10_%1
|
||||||
lea r0, [r10+r2*%4]
|
lea r0, [r%7+r2*%4]
|
||||||
lea r1, [r11+r2*%4]
|
lea r1, [r %+ p1+r2*%4]
|
||||||
call stub_%2_h264_qpel%4_%3_10_%1
|
call stub_%2_h264_qpel%4_%3_10_%1
|
||||||
lea r0, [r10+r2*%4+%4*2]
|
lea r0, [r%7+r2*%4+%4*2]
|
||||||
lea r1, [r11+r2*%4+%4*2]
|
lea r1, [r %+ p1+r2*%4+%4*2]
|
||||||
%if UNIX64 == 0 ; fall through to function
|
%if UNIX64 == 0 ; fall through to function
|
||||||
call stub_%2_h264_qpel%4_%3_10_%1
|
call stub_%2_h264_qpel%4_%3_10_%1
|
||||||
RET
|
RET
|
||||||
|
@ -127,7 +127,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||||||
|
|
||||||
%macro BIWEIGHT_SETUP 0
|
%macro BIWEIGHT_SETUP 0
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define off_regd r11d
|
%define off_regd r7d
|
||||||
%else
|
%else
|
||||||
%define off_regd r3d
|
%define off_regd r3d
|
||||||
%endif
|
%endif
|
||||||
@ -175,7 +175,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX
|
||||||
cglobal h264_biweight_16_mmx2, 7, 7, 0
|
cglobal h264_biweight_16_mmx2, 7, 8, 0
|
||||||
BIWEIGHT_SETUP
|
BIWEIGHT_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
.nextrow
|
.nextrow
|
||||||
@ -194,7 +194,7 @@ cglobal h264_biweight_16_mmx2, 7, 7, 0
|
|||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
%macro BIWEIGHT_FUNC_MM 3
|
%macro BIWEIGHT_FUNC_MM 3
|
||||||
cglobal h264_biweight_%1_%3, 7, 7, %2
|
cglobal h264_biweight_%1_%3, 7, 8, %2
|
||||||
BIWEIGHT_SETUP
|
BIWEIGHT_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
.nextrow
|
.nextrow
|
||||||
@ -215,7 +215,7 @@ INIT_XMM
|
|||||||
BIWEIGHT_FUNC_MM 16, 8, sse2
|
BIWEIGHT_FUNC_MM 16, 8, sse2
|
||||||
|
|
||||||
%macro BIWEIGHT_FUNC_HALF_MM 3
|
%macro BIWEIGHT_FUNC_HALF_MM 3
|
||||||
cglobal h264_biweight_%1_%3, 7, 7, %2
|
cglobal h264_biweight_%1_%3, 7, 8, %2
|
||||||
BIWEIGHT_SETUP
|
BIWEIGHT_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
sar r3, 1
|
sar r3, 1
|
||||||
@ -245,7 +245,7 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||||||
|
|
||||||
%macro BIWEIGHT_SSSE3_SETUP 0
|
%macro BIWEIGHT_SSSE3_SETUP 0
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define off_regd r11d
|
%define off_regd r7d
|
||||||
%else
|
%else
|
||||||
%define off_regd r3d
|
%define off_regd r3d
|
||||||
%endif
|
%endif
|
||||||
@ -284,7 +284,7 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
cglobal h264_biweight_16_ssse3, 7, 7, 8
|
cglobal h264_biweight_16_ssse3, 7, 8, 8
|
||||||
BIWEIGHT_SSSE3_SETUP
|
BIWEIGHT_SSSE3_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
|
|
||||||
@ -303,7 +303,7 @@ cglobal h264_biweight_16_ssse3, 7, 7, 8
|
|||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
cglobal h264_biweight_8_ssse3, 7, 7, 8
|
cglobal h264_biweight_8_ssse3, 7, 8, 8
|
||||||
BIWEIGHT_SSSE3_SETUP
|
BIWEIGHT_SSSE3_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
sar r3, 1
|
sar r3, 1
|
||||||
|
@ -57,6 +57,18 @@ AVOutputFormat ff_adx_muxer = {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_CAVSVIDEO_MUXER
|
||||||
|
AVOutputFormat ff_cavsvideo_muxer = {
|
||||||
|
.name = "cavsvideo",
|
||||||
|
.long_name = NULL_IF_CONFIG_SMALL("raw Chinese AVS video"),
|
||||||
|
.extensions = "cavs",
|
||||||
|
.audio_codec = CODEC_ID_NONE,
|
||||||
|
.video_codec = CODEC_ID_CAVS,
|
||||||
|
.write_packet = ff_raw_write_packet,
|
||||||
|
.flags = AVFMT_NOTIMESTAMPS,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
#if CONFIG_DIRAC_MUXER
|
#if CONFIG_DIRAC_MUXER
|
||||||
AVOutputFormat ff_dirac_muxer = {
|
AVOutputFormat ff_dirac_muxer = {
|
||||||
.name = "dirac",
|
.name = "dirac",
|
||||||
@ -171,18 +183,6 @@ AVOutputFormat ff_h264_muxer = {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CONFIG_CAVSVIDEO_MUXER
|
|
||||||
AVOutputFormat ff_cavsvideo_muxer = {
|
|
||||||
.name = "cavsvideo",
|
|
||||||
.long_name = NULL_IF_CONFIG_SMALL("raw Chinese AVS video"),
|
|
||||||
.extensions = "cavs",
|
|
||||||
.audio_codec = CODEC_ID_NONE,
|
|
||||||
.video_codec = CODEC_ID_CAVS,
|
|
||||||
.write_packet = ff_raw_write_packet,
|
|
||||||
.flags = AVFMT_NOTIMESTAMPS,
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if CONFIG_M4V_MUXER
|
#if CONFIG_M4V_MUXER
|
||||||
AVOutputFormat ff_m4v_muxer = {
|
AVOutputFormat ff_m4v_muxer = {
|
||||||
.name = "m4v",
|
.name = "m4v",
|
||||||
@ -220,30 +220,6 @@ AVOutputFormat ff_mlp_muxer = {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if CONFIG_SRT_MUXER
|
|
||||||
AVOutputFormat ff_srt_muxer = {
|
|
||||||
.name = "srt",
|
|
||||||
.long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle format"),
|
|
||||||
.mime_type = "application/x-subrip",
|
|
||||||
.extensions = "srt",
|
|
||||||
.write_packet = ff_raw_write_packet,
|
|
||||||
.flags = AVFMT_NOTIMESTAMPS,
|
|
||||||
.subtitle_codec = CODEC_ID_SRT,
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if CONFIG_TRUEHD_MUXER
|
|
||||||
AVOutputFormat ff_truehd_muxer = {
|
|
||||||
.name = "truehd",
|
|
||||||
.long_name = NULL_IF_CONFIG_SMALL("raw TrueHD"),
|
|
||||||
.extensions = "thd",
|
|
||||||
.audio_codec = CODEC_ID_TRUEHD,
|
|
||||||
.video_codec = CODEC_ID_NONE,
|
|
||||||
.write_packet = ff_raw_write_packet,
|
|
||||||
.flags = AVFMT_NOTIMESTAMPS,
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if CONFIG_MPEG1VIDEO_MUXER
|
#if CONFIG_MPEG1VIDEO_MUXER
|
||||||
AVOutputFormat ff_mpeg1video_muxer = {
|
AVOutputFormat ff_mpeg1video_muxer = {
|
||||||
.name = "mpeg1video",
|
.name = "mpeg1video",
|
||||||
@ -280,3 +256,27 @@ AVOutputFormat ff_rawvideo_muxer = {
|
|||||||
.flags = AVFMT_NOTIMESTAMPS,
|
.flags = AVFMT_NOTIMESTAMPS,
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_SRT_MUXER
|
||||||
|
AVOutputFormat ff_srt_muxer = {
|
||||||
|
.name = "srt",
|
||||||
|
.long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle format"),
|
||||||
|
.mime_type = "application/x-subrip",
|
||||||
|
.extensions = "srt",
|
||||||
|
.write_packet = ff_raw_write_packet,
|
||||||
|
.flags = AVFMT_NOTIMESTAMPS,
|
||||||
|
.subtitle_codec = CODEC_ID_SRT,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_TRUEHD_MUXER
|
||||||
|
AVOutputFormat ff_truehd_muxer = {
|
||||||
|
.name = "truehd",
|
||||||
|
.long_name = NULL_IF_CONFIG_SMALL("raw TrueHD"),
|
||||||
|
.extensions = "thd",
|
||||||
|
.audio_codec = CODEC_ID_TRUEHD,
|
||||||
|
.video_codec = CODEC_ID_NONE,
|
||||||
|
.write_packet = ff_raw_write_packet,
|
||||||
|
.flags = AVFMT_NOTIMESTAMPS,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
;*****************************************************************************
|
;*****************************************************************************
|
||||||
;* x86inc.asm: x264asm abstraction layer
|
;* x86inc.asm: x264asm abstraction layer
|
||||||
;*****************************************************************************
|
;*****************************************************************************
|
||||||
;* Copyright (C) 2005-2011 x264 project
|
;* Copyright (C) 2005-2012 x264 project
|
||||||
;*
|
;*
|
||||||
;* Authors: Loren Merritt <lorenm@u.washington.edu>
|
;* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||||
;* Anton Mitrofanov <BugMaster@narod.ru>
|
;* Anton Mitrofanov <BugMaster@narod.ru>
|
||||||
;* Jason Garrett-Glaser <darkshikari@gmail.com>
|
;* Jason Garrett-Glaser <darkshikari@gmail.com>
|
||||||
|
;* Henrik Gramner <hengar-6@student.ltu.se>
|
||||||
;*
|
;*
|
||||||
;* Permission to use, copy, modify, and/or distribute this software for any
|
;* Permission to use, copy, modify, and/or distribute this software for any
|
||||||
;* purpose with or without fee is hereby granted, provided that the above
|
;* purpose with or without fee is hereby granted, provided that the above
|
||||||
@ -95,6 +96,9 @@
|
|||||||
default rel
|
default rel
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
|
||||||
|
CPU amdnop
|
||||||
|
|
||||||
; Macros to eliminate most code duplication between x86_32 and x86_64:
|
; Macros to eliminate most code duplication between x86_32 and x86_64:
|
||||||
; Currently this works only for leaf functions which load all their arguments
|
; Currently this works only for leaf functions which load all their arguments
|
||||||
; into registers at the start, and make no other use of the stack. Luckily that
|
; into registers at the start, and make no other use of the stack. Luckily that
|
||||||
@ -128,18 +132,20 @@
|
|||||||
; rNm is the original location of arg N (a register or on the stack), dword
|
; rNm is the original location of arg N (a register or on the stack), dword
|
||||||
; rNmp is native size
|
; rNmp is native size
|
||||||
|
|
||||||
%macro DECLARE_REG 6
|
%macro DECLARE_REG 5-6
|
||||||
%define r%1q %2
|
%define r%1q %2
|
||||||
%define r%1d %3
|
%define r%1d %3
|
||||||
%define r%1w %4
|
%define r%1w %4
|
||||||
%define r%1b %5
|
%define r%1b %5
|
||||||
%define r%1m %6
|
%if %0 == 5
|
||||||
%ifid %6 ; i.e. it's a register
|
%define r%1m %3
|
||||||
%define r%1mp %2
|
%define r%1mp %2
|
||||||
%elif ARCH_X86_64 ; memory
|
%elif ARCH_X86_64 ; memory
|
||||||
%define r%1mp qword %6
|
%define r%1m [rsp + stack_offset + %6]
|
||||||
|
%define r%1mp qword r %+ %1m
|
||||||
%else
|
%else
|
||||||
%define r%1mp dword %6
|
%define r%1m [esp + stack_offset + %6]
|
||||||
|
%define r%1mp dword r %+ %1m
|
||||||
%endif
|
%endif
|
||||||
%define r%1 %2
|
%define r%1 %2
|
||||||
%endmacro
|
%endmacro
|
||||||
@ -187,7 +193,7 @@ DECLARE_REG_SIZE bp, bpl
|
|||||||
%endrep
|
%endrep
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
|
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define gprsize 8
|
%define gprsize 8
|
||||||
@ -205,6 +211,33 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
|
|||||||
%assign stack_offset stack_offset-gprsize
|
%assign stack_offset stack_offset-gprsize
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%macro PUSH_IF_USED 1-*
|
||||||
|
%rep %0
|
||||||
|
%if %1 < regs_used
|
||||||
|
PUSH r%1
|
||||||
|
%endif
|
||||||
|
%rotate 1
|
||||||
|
%endrep
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro POP_IF_USED 1-*
|
||||||
|
%rep %0
|
||||||
|
%if %1 < regs_used
|
||||||
|
pop r%1
|
||||||
|
%endif
|
||||||
|
%rotate 1
|
||||||
|
%endrep
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_IF_USED 1-*
|
||||||
|
%rep %0
|
||||||
|
%if %1 < num_args
|
||||||
|
mov r%1, r %+ %1 %+ mp
|
||||||
|
%endif
|
||||||
|
%rotate 1
|
||||||
|
%endrep
|
||||||
|
%endmacro
|
||||||
|
|
||||||
%macro SUB 2
|
%macro SUB 2
|
||||||
sub %1, %2
|
sub %1, %2
|
||||||
%ifidn %1, rsp
|
%ifidn %1, rsp
|
||||||
@ -272,39 +305,34 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
|
|||||||
|
|
||||||
%if WIN64 ; Windows x64 ;=================================================
|
%if WIN64 ; Windows x64 ;=================================================
|
||||||
|
|
||||||
DECLARE_REG 0, rcx, ecx, cx, cl, ecx
|
DECLARE_REG 0, rcx, ecx, cx, cl
|
||||||
DECLARE_REG 1, rdx, edx, dx, dl, edx
|
DECLARE_REG 1, rdx, edx, dx, dl
|
||||||
DECLARE_REG 2, r8, r8d, r8w, r8b, r8d
|
DECLARE_REG 2, R8, R8D, R8W, R8B
|
||||||
DECLARE_REG 3, r9, r9d, r9w, r9b, r9d
|
DECLARE_REG 3, R9, R9D, R9W, R9B
|
||||||
DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40]
|
DECLARE_REG 4, R10, R10D, R10W, R10B, 40
|
||||||
DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48]
|
DECLARE_REG 5, R11, R11D, R11W, R11B, 48
|
||||||
DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
DECLARE_REG 6, rax, eax, ax, al, 56
|
||||||
%define r7m [rsp + stack_offset + 64]
|
DECLARE_REG 7, rdi, edi, di, dil, 64
|
||||||
%define r8m [rsp + stack_offset + 72]
|
DECLARE_REG 8, rsi, esi, si, sil, 72
|
||||||
|
DECLARE_REG 9, rbx, ebx, bx, bl, 80
|
||||||
%macro LOAD_IF_USED 2 ; reg_id, number_of_args
|
DECLARE_REG 10, rbp, ebp, bp, bpl, 88
|
||||||
%if %1 < %2
|
DECLARE_REG 11, R12, R12D, R12W, R12B, 96
|
||||||
mov r%1, [rsp + stack_offset + 8 + %1*8]
|
DECLARE_REG 12, R13, R13D, R13W, R13B, 104
|
||||||
%endif
|
DECLARE_REG 13, R14, R14D, R14W, R14B, 112
|
||||||
%endmacro
|
DECLARE_REG 14, R15, R15D, R15W, R15B, 120
|
||||||
|
|
||||||
%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
|
%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
|
||||||
ASSERT %2 >= %1
|
%assign num_args %1
|
||||||
%assign regs_used %2
|
%assign regs_used %2
|
||||||
ASSERT regs_used <= 7
|
ASSERT regs_used >= num_args
|
||||||
%if regs_used > 4
|
ASSERT regs_used <= 15
|
||||||
push r4
|
PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
|
||||||
push r5
|
|
||||||
%assign stack_offset stack_offset+16
|
|
||||||
%endif
|
|
||||||
%if mmsize == 8
|
%if mmsize == 8
|
||||||
%assign xmm_regs_used 0
|
%assign xmm_regs_used 0
|
||||||
%else
|
%else
|
||||||
WIN64_SPILL_XMM %3
|
WIN64_SPILL_XMM %3
|
||||||
%endif
|
%endif
|
||||||
LOAD_IF_USED 4, %1
|
LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
|
||||||
LOAD_IF_USED 5, %1
|
|
||||||
LOAD_IF_USED 6, %1
|
|
||||||
DEFINE_ARGS %4
|
DEFINE_ARGS %4
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
@ -312,12 +340,11 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
|||||||
%assign xmm_regs_used %1
|
%assign xmm_regs_used %1
|
||||||
ASSERT xmm_regs_used <= 16
|
ASSERT xmm_regs_used <= 16
|
||||||
%if xmm_regs_used > 6
|
%if xmm_regs_used > 6
|
||||||
sub rsp, (xmm_regs_used-6)*16+16
|
SUB rsp, (xmm_regs_used-6)*16+16
|
||||||
%assign stack_offset stack_offset+(xmm_regs_used-6)*16+16
|
|
||||||
%assign %%i xmm_regs_used
|
%assign %%i xmm_regs_used
|
||||||
%rep (xmm_regs_used-6)
|
%rep (xmm_regs_used-6)
|
||||||
%assign %%i %%i-1
|
%assign %%i %%i-1
|
||||||
movdqa [rsp + (%%i-6)*16+8], xmm %+ %%i
|
movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
|
||||||
%endrep
|
%endrep
|
||||||
%endif
|
%endif
|
||||||
%endmacro
|
%endmacro
|
||||||
@ -327,7 +354,7 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
|||||||
%assign %%i xmm_regs_used
|
%assign %%i xmm_regs_used
|
||||||
%rep (xmm_regs_used-6)
|
%rep (xmm_regs_used-6)
|
||||||
%assign %%i %%i-1
|
%assign %%i %%i-1
|
||||||
movdqa xmm %+ %%i, [%1 + (%%i-6)*16+8]
|
movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
|
||||||
%endrep
|
%endrep
|
||||||
add %1, (xmm_regs_used-6)*16+16
|
add %1, (xmm_regs_used-6)*16+16
|
||||||
%endif
|
%endif
|
||||||
@ -341,15 +368,12 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
|||||||
|
|
||||||
%macro RET 0
|
%macro RET 0
|
||||||
WIN64_RESTORE_XMM_INTERNAL rsp
|
WIN64_RESTORE_XMM_INTERNAL rsp
|
||||||
%if regs_used > 4
|
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
|
||||||
pop r5
|
|
||||||
pop r4
|
|
||||||
%endif
|
|
||||||
ret
|
ret
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro REP_RET 0
|
%macro REP_RET 0
|
||||||
%if regs_used > 4 || xmm_regs_used > 6
|
%if regs_used > 7 || xmm_regs_used > 6
|
||||||
RET
|
RET
|
||||||
%else
|
%else
|
||||||
rep ret
|
rep ret
|
||||||
@ -358,92 +382,80 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
|||||||
|
|
||||||
%elif ARCH_X86_64 ; *nix x64 ;=============================================
|
%elif ARCH_X86_64 ; *nix x64 ;=============================================
|
||||||
|
|
||||||
DECLARE_REG 0, rdi, edi, di, dil, edi
|
DECLARE_REG 0, rdi, edi, di, dil
|
||||||
DECLARE_REG 1, rsi, esi, si, sil, esi
|
DECLARE_REG 1, rsi, esi, si, sil
|
||||||
DECLARE_REG 2, rdx, edx, dx, dl, edx
|
DECLARE_REG 2, rdx, edx, dx, dl
|
||||||
DECLARE_REG 3, rcx, ecx, cx, cl, ecx
|
DECLARE_REG 3, rcx, ecx, cx, cl
|
||||||
DECLARE_REG 4, r8, r8d, r8w, r8b, r8d
|
DECLARE_REG 4, R8, R8D, R8W, R8B
|
||||||
DECLARE_REG 5, r9, r9d, r9w, r9b, r9d
|
DECLARE_REG 5, R9, R9D, R9W, R9B
|
||||||
DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8]
|
DECLARE_REG 6, rax, eax, ax, al, 8
|
||||||
%define r7m [rsp + stack_offset + 16]
|
DECLARE_REG 7, R10, R10D, R10W, R10B, 16
|
||||||
%define r8m [rsp + stack_offset + 24]
|
DECLARE_REG 8, R11, R11D, R11W, R11B, 24
|
||||||
|
DECLARE_REG 9, rbx, ebx, bx, bl, 32
|
||||||
%macro LOAD_IF_USED 2 ; reg_id, number_of_args
|
DECLARE_REG 10, rbp, ebp, bp, bpl, 40
|
||||||
%if %1 < %2
|
DECLARE_REG 11, R12, R12D, R12W, R12B, 48
|
||||||
mov r%1, [rsp - 40 + %1*8]
|
DECLARE_REG 12, R13, R13D, R13W, R13B, 56
|
||||||
%endif
|
DECLARE_REG 13, R14, R14D, R14W, R14B, 64
|
||||||
%endmacro
|
DECLARE_REG 14, R15, R15D, R15W, R15B, 72
|
||||||
|
|
||||||
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
|
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
|
||||||
ASSERT %2 >= %1
|
%assign num_args %1
|
||||||
ASSERT %2 <= 7
|
%assign regs_used %2
|
||||||
LOAD_IF_USED 6, %1
|
ASSERT regs_used >= num_args
|
||||||
|
ASSERT regs_used <= 15
|
||||||
|
PUSH_IF_USED 9, 10, 11, 12, 13, 14
|
||||||
|
LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
|
||||||
DEFINE_ARGS %4
|
DEFINE_ARGS %4
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro RET 0
|
%macro RET 0
|
||||||
|
POP_IF_USED 14, 13, 12, 11, 10, 9
|
||||||
ret
|
ret
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro REP_RET 0
|
%macro REP_RET 0
|
||||||
|
%if regs_used > 9
|
||||||
|
RET
|
||||||
|
%else
|
||||||
rep ret
|
rep ret
|
||||||
|
%endif
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%else ; X86_32 ;==============================================================
|
%else ; X86_32 ;==============================================================
|
||||||
|
|
||||||
DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4]
|
DECLARE_REG 0, eax, eax, ax, al, 4
|
||||||
DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8]
|
DECLARE_REG 1, ecx, ecx, cx, cl, 8
|
||||||
DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12]
|
DECLARE_REG 2, edx, edx, dx, dl, 12
|
||||||
DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16]
|
DECLARE_REG 3, ebx, ebx, bx, bl, 16
|
||||||
DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20]
|
DECLARE_REG 4, esi, esi, si, null, 20
|
||||||
DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24]
|
DECLARE_REG 5, edi, edi, di, null, 24
|
||||||
DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
|
DECLARE_REG 6, ebp, ebp, bp, null, 28
|
||||||
%define r7m [esp + stack_offset + 32]
|
|
||||||
%define r8m [esp + stack_offset + 36]
|
|
||||||
%define rsp esp
|
%define rsp esp
|
||||||
|
|
||||||
%macro PUSH_IF_USED 1 ; reg_id
|
%macro DECLARE_ARG 1-*
|
||||||
%if %1 < regs_used
|
%rep %0
|
||||||
push r%1
|
%define r%1m [esp + stack_offset + 4*%1 + 4]
|
||||||
%assign stack_offset stack_offset+4
|
%define r%1mp dword r%1m
|
||||||
%endif
|
%rotate 1
|
||||||
|
%endrep
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro POP_IF_USED 1 ; reg_id
|
DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||||
%if %1 < regs_used
|
|
||||||
pop r%1
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro LOAD_IF_USED 2 ; reg_id, number_of_args
|
|
||||||
%if %1 < %2
|
|
||||||
mov r%1, [esp + stack_offset + 4 + %1*4]
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
|
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
|
||||||
ASSERT %2 >= %1
|
%assign num_args %1
|
||||||
%assign regs_used %2
|
%assign regs_used %2
|
||||||
ASSERT regs_used <= 7
|
%if regs_used > 7
|
||||||
PUSH_IF_USED 3
|
%assign regs_used 7
|
||||||
PUSH_IF_USED 4
|
%endif
|
||||||
PUSH_IF_USED 5
|
ASSERT regs_used >= num_args
|
||||||
PUSH_IF_USED 6
|
PUSH_IF_USED 3, 4, 5, 6
|
||||||
LOAD_IF_USED 0, %1
|
LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
|
||||||
LOAD_IF_USED 1, %1
|
|
||||||
LOAD_IF_USED 2, %1
|
|
||||||
LOAD_IF_USED 3, %1
|
|
||||||
LOAD_IF_USED 4, %1
|
|
||||||
LOAD_IF_USED 5, %1
|
|
||||||
LOAD_IF_USED 6, %1
|
|
||||||
DEFINE_ARGS %4
|
DEFINE_ARGS %4
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro RET 0
|
%macro RET 0
|
||||||
POP_IF_USED 6
|
POP_IF_USED 6, 5, 4, 3
|
||||||
POP_IF_USED 5
|
|
||||||
POP_IF_USED 4
|
|
||||||
POP_IF_USED 3
|
|
||||||
ret
|
ret
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
@ -464,8 +476,6 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
|
|||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;=============================================================================
|
;=============================================================================
|
||||||
; arch-independent part
|
; arch-independent part
|
||||||
;=============================================================================
|
;=============================================================================
|
||||||
|
@ -17,14 +17,14 @@ OBJS = input.o \
|
|||||||
OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \
|
OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \
|
||||||
bfin/swscale_bfin.o \
|
bfin/swscale_bfin.o \
|
||||||
bfin/yuv2rgb_bfin.o
|
bfin/yuv2rgb_bfin.o
|
||||||
OBJS-$(HAVE_ALTIVEC) += ppc/swscale_altivec.o \
|
ALTIVEC-OBJS += ppc/swscale_altivec.o \
|
||||||
ppc/yuv2rgb_altivec.o \
|
ppc/yuv2rgb_altivec.o \
|
||||||
ppc/yuv2yuv_altivec.o
|
ppc/yuv2yuv_altivec.o
|
||||||
OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \
|
MMX-OBJS += x86/rgb2rgb.o \
|
||||||
x86/swscale_mmx.o \
|
x86/swscale_mmx.o \
|
||||||
x86/yuv2rgb_mmx.o
|
x86/yuv2rgb_mmx.o
|
||||||
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o
|
VIS-OBJS += sparc/yuv2rgb_vis.o
|
||||||
MMX-OBJS-$(HAVE_YASM) += x86/input.o \
|
YASM-OBJS += x86/input.o \
|
||||||
x86/output.o \
|
x86/output.o \
|
||||||
x86/scale.o
|
x86/scale.o
|
||||||
|
|
||||||
|
@ -62,11 +62,11 @@ SECTION .text
|
|||||||
%define cntr_reg fltsizeq
|
%define cntr_reg fltsizeq
|
||||||
%define movsx mov
|
%define movsx mov
|
||||||
%else
|
%else
|
||||||
%define cntr_reg r11
|
%define cntr_reg r7
|
||||||
%define movsx movsxd
|
%define movsx movsxd
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
cglobal yuv2planeX_%1, %3, 7, %2, filter, fltsize, src, dst, w, dither, offset
|
cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
|
||||||
%if %1 == 8 || %1 == 9 || %1 == 10
|
%if %1 == 8 || %1 == 9 || %1 == 10
|
||||||
pxor m6, m6
|
pxor m6, m6
|
||||||
%endif ; %1 == 8/9/10
|
%endif ; %1 == 8/9/10
|
||||||
|
@ -53,7 +53,7 @@ SECTION .text
|
|||||||
%ifnidn %3, X
|
%ifnidn %3, X
|
||||||
cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1
|
cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1
|
||||||
%else
|
%else
|
||||||
cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize
|
cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize
|
||||||
%endif
|
%endif
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movsxd wq, wd
|
movsxd wq, wd
|
||||||
@ -245,10 +245,9 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz
|
|||||||
%define dlt 0
|
%define dlt 0
|
||||||
%endif ; %4 ==/!= X4
|
%endif ; %4 ==/!= X4
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
push r12
|
%define srcq r8
|
||||||
%define srcq r11
|
%define pos1q r7
|
||||||
%define pos1q r10
|
%define srcendq r9
|
||||||
%define srcendq r12
|
|
||||||
movsxd fltsizeq, fltsized ; filterSize
|
movsxd fltsizeq, fltsized ; filterSize
|
||||||
lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4]
|
lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4]
|
||||||
%else ; x86-32
|
%else ; x86-32
|
||||||
@ -388,16 +387,7 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz
|
|||||||
add wq, 2
|
add wq, 2
|
||||||
%endif ; %3 ==/!= X
|
%endif ; %3 ==/!= X
|
||||||
jl .loop
|
jl .loop
|
||||||
%ifnidn %3, X
|
|
||||||
REP_RET
|
REP_RET
|
||||||
%else ; %3 == X
|
|
||||||
%if ARCH_X86_64
|
|
||||||
pop r12
|
|
||||||
RET
|
|
||||||
%else ; x86-32
|
|
||||||
REP_RET
|
|
||||||
%endif ; x86-32/64
|
|
||||||
%endif ; %3 ==/!= X
|
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
; SCALE_FUNCS source_width, intermediate_nbits, n_xmm
|
; SCALE_FUNCS source_width, intermediate_nbits, n_xmm
|
||||||
|
Loading…
x
Reference in New Issue
Block a user