Merge commit 'c166148409fe8f0dbccef2fe684286a40ba1e37d'
* commit 'c166148409fe8f0dbccef2fe684286a40ba1e37d': dsputil: Move pix_sum, pix_norm1, shrink function pointers to mpegvideoenc Conflicts: libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/x86/dsputilenc.asm libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
020865f557
@ -22,6 +22,7 @@ OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \
|
|||||||
OBJS-$(CONFIG_LLAUDDSP) += arm/lossless_audiodsp_init_arm.o
|
OBJS-$(CONFIG_LLAUDDSP) += arm/lossless_audiodsp_init_arm.o
|
||||||
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
|
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
|
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
|
||||||
|
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o
|
||||||
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
|
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
|
||||||
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
|
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
|
||||||
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
|
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
|
||||||
@ -61,6 +62,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \
|
|||||||
arm/idctdsp_armv6.o \
|
arm/idctdsp_armv6.o \
|
||||||
arm/simple_idct_armv6.o
|
arm/simple_idct_armv6.o
|
||||||
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
|
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
|
||||||
|
ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o
|
||||||
|
|
||||||
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
|
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
|
||||||
ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o
|
ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o
|
||||||
|
@ -297,58 +297,3 @@ function ff_sse16_armv6, export=1
|
|||||||
|
|
||||||
pop {r4-r9, pc}
|
pop {r4-r9, pc}
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_pix_norm1_armv6, export=1
|
|
||||||
push {r4-r6, lr}
|
|
||||||
mov r12, #16
|
|
||||||
mov lr, #0
|
|
||||||
1:
|
|
||||||
ldm r0, {r2-r5}
|
|
||||||
uxtb16 r6, r2
|
|
||||||
uxtb16 r2, r2, ror #8
|
|
||||||
smlad lr, r6, r6, lr
|
|
||||||
uxtb16 r6, r3
|
|
||||||
smlad lr, r2, r2, lr
|
|
||||||
uxtb16 r3, r3, ror #8
|
|
||||||
smlad lr, r6, r6, lr
|
|
||||||
uxtb16 r6, r4
|
|
||||||
smlad lr, r3, r3, lr
|
|
||||||
uxtb16 r4, r4, ror #8
|
|
||||||
smlad lr, r6, r6, lr
|
|
||||||
uxtb16 r6, r5
|
|
||||||
smlad lr, r4, r4, lr
|
|
||||||
uxtb16 r5, r5, ror #8
|
|
||||||
smlad lr, r6, r6, lr
|
|
||||||
subs r12, r12, #1
|
|
||||||
add r0, r0, r1
|
|
||||||
smlad lr, r5, r5, lr
|
|
||||||
bgt 1b
|
|
||||||
|
|
||||||
mov r0, lr
|
|
||||||
pop {r4-r6, pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_pix_sum_armv6, export=1
|
|
||||||
push {r4-r7, lr}
|
|
||||||
mov r12, #16
|
|
||||||
mov r2, #0
|
|
||||||
mov r3, #0
|
|
||||||
mov lr, #0
|
|
||||||
ldr r4, [r0]
|
|
||||||
1:
|
|
||||||
subs r12, r12, #1
|
|
||||||
ldr r5, [r0, #4]
|
|
||||||
usada8 r2, r4, lr, r2
|
|
||||||
ldr r6, [r0, #8]
|
|
||||||
usada8 r3, r5, lr, r3
|
|
||||||
ldr r7, [r0, #12]
|
|
||||||
usada8 r2, r6, lr, r2
|
|
||||||
beq 2f
|
|
||||||
ldr_pre r4, r0, r1
|
|
||||||
usada8 r3, r7, lr, r3
|
|
||||||
bgt 1b
|
|
||||||
2:
|
|
||||||
usada8 r3, r7, lr, r3
|
|
||||||
add r0, r2, r3
|
|
||||||
pop {r4-r7, pc}
|
|
||||||
endfunc
|
|
||||||
|
@ -43,9 +43,6 @@ int ff_pix_abs8_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
|||||||
int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
||||||
int line_size, int h);
|
int line_size, int h);
|
||||||
|
|
||||||
int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
|
|
||||||
int ff_pix_sum_armv6(uint8_t *pix, int line_size);
|
|
||||||
|
|
||||||
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
|
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
|
||||||
unsigned high_bit_depth)
|
unsigned high_bit_depth)
|
||||||
{
|
{
|
||||||
@ -63,7 +60,4 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
|
|||||||
c->sad[1] = ff_pix_abs8_armv6;
|
c->sad[1] = ff_pix_abs8_armv6;
|
||||||
|
|
||||||
c->sse[0] = ff_sse16_armv6;
|
c->sse[0] = ff_sse16_armv6;
|
||||||
|
|
||||||
c->pix_norm1 = ff_pix_norm1_armv6;
|
|
||||||
c->pix_sum = ff_pix_sum_armv6;
|
|
||||||
}
|
}
|
||||||
|
76
libavcodec/arm/mpegvideoencdsp_armv6.S
Normal file
76
libavcodec/arm/mpegvideoencdsp_armv6.S
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
|
function ff_pix_norm1_armv6, export=1
|
||||||
|
push {r4-r6, lr}
|
||||||
|
mov r12, #16
|
||||||
|
mov lr, #0
|
||||||
|
1:
|
||||||
|
ldm r0, {r2-r5}
|
||||||
|
uxtb16 r6, r2
|
||||||
|
uxtb16 r2, r2, ror #8
|
||||||
|
smlad lr, r6, r6, lr
|
||||||
|
uxtb16 r6, r3
|
||||||
|
smlad lr, r2, r2, lr
|
||||||
|
uxtb16 r3, r3, ror #8
|
||||||
|
smlad lr, r6, r6, lr
|
||||||
|
uxtb16 r6, r4
|
||||||
|
smlad lr, r3, r3, lr
|
||||||
|
uxtb16 r4, r4, ror #8
|
||||||
|
smlad lr, r6, r6, lr
|
||||||
|
uxtb16 r6, r5
|
||||||
|
smlad lr, r4, r4, lr
|
||||||
|
uxtb16 r5, r5, ror #8
|
||||||
|
smlad lr, r6, r6, lr
|
||||||
|
subs r12, r12, #1
|
||||||
|
add r0, r0, r1
|
||||||
|
smlad lr, r5, r5, lr
|
||||||
|
bgt 1b
|
||||||
|
|
||||||
|
mov r0, lr
|
||||||
|
pop {r4-r6, pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_pix_sum_armv6, export=1
|
||||||
|
push {r4-r7, lr}
|
||||||
|
mov r12, #16
|
||||||
|
mov r2, #0
|
||||||
|
mov r3, #0
|
||||||
|
mov lr, #0
|
||||||
|
ldr r4, [r0]
|
||||||
|
1:
|
||||||
|
subs r12, r12, #1
|
||||||
|
ldr r5, [r0, #4]
|
||||||
|
usada8 r2, r4, lr, r2
|
||||||
|
ldr r6, [r0, #8]
|
||||||
|
usada8 r3, r5, lr, r3
|
||||||
|
ldr r7, [r0, #12]
|
||||||
|
usada8 r2, r6, lr, r2
|
||||||
|
beq 2f
|
||||||
|
ldr_pre r4, r0, r1
|
||||||
|
usada8 r3, r7, lr, r3
|
||||||
|
bgt 1b
|
||||||
|
2:
|
||||||
|
usada8 r3, r7, lr, r3
|
||||||
|
add r0, r2, r3
|
||||||
|
pop {r4-r7, pc}
|
||||||
|
endfunc
|
38
libavcodec/arm/mpegvideoencdsp_init_arm.c
Normal file
38
libavcodec/arm/mpegvideoencdsp_init_arm.c
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/arm/cpu.h"
|
||||||
|
#include "libavcodec/avcodec.h"
|
||||||
|
#include "libavcodec/mpegvideoencdsp.h"
|
||||||
|
|
||||||
|
int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
|
||||||
|
int ff_pix_sum_armv6(uint8_t *pix, int line_size);
|
||||||
|
|
||||||
|
av_cold void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (have_armv6(cpu_flags)) {
|
||||||
|
c->pix_norm1 = ff_pix_norm1_armv6;
|
||||||
|
c->pix_sum = ff_pix_sum_armv6;
|
||||||
|
}
|
||||||
|
}
|
@ -323,6 +323,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
ff_blockdsp_init(&ctx->bdsp, avctx);
|
ff_blockdsp_init(&ctx->bdsp, avctx);
|
||||||
ff_idctdsp_init(&ctx->m.idsp, avctx);
|
ff_idctdsp_init(&ctx->m.idsp, avctx);
|
||||||
|
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
|
||||||
ff_dct_common_init(&ctx->m);
|
ff_dct_common_init(&ctx->m);
|
||||||
ff_dct_encode_init(&ctx->m);
|
ff_dct_encode_init(&ctx->m);
|
||||||
|
|
||||||
@ -733,8 +734,8 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg,
|
|||||||
int varc;
|
int varc;
|
||||||
|
|
||||||
if (!partial_last_row && mb_x * 16 <= avctx->width - 16) {
|
if (!partial_last_row && mb_x * 16 <= avctx->width - 16) {
|
||||||
sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
|
sum = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize);
|
||||||
varc = ctx->m.dsp.pix_norm1(pix, ctx->m.linesize);
|
varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize);
|
||||||
} else {
|
} else {
|
||||||
int bw = FFMIN(avctx->width - 16 * mb_x, 16);
|
int bw = FFMIN(avctx->width - 16 * mb_x, 16);
|
||||||
int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
|
int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "libavutil/attributes.h"
|
#include "libavutil/attributes.h"
|
||||||
#include "libavutil/imgutils.h"
|
|
||||||
#include "libavutil/internal.h"
|
#include "libavutil/internal.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "copy_block.h"
|
#include "copy_block.h"
|
||||||
@ -34,8 +33,6 @@
|
|||||||
#include "dsputil.h"
|
#include "dsputil.h"
|
||||||
#include "simple_idct.h"
|
#include "simple_idct.h"
|
||||||
#include "faandct.h"
|
#include "faandct.h"
|
||||||
#include "imgconvert.h"
|
|
||||||
#include "mathops.h"
|
|
||||||
#include "mpegvideo.h"
|
#include "mpegvideo.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
@ -48,74 +45,6 @@ uint32_t ff_square_tab[512] = { 0, };
|
|||||||
#define BIT_DEPTH 8
|
#define BIT_DEPTH 8
|
||||||
#include "dsputilenc_template.c"
|
#include "dsputilenc_template.c"
|
||||||
|
|
||||||
static int pix_sum_c(uint8_t *pix, int line_size)
|
|
||||||
{
|
|
||||||
int s = 0, i, j;
|
|
||||||
|
|
||||||
for (i = 0; i < 16; i++) {
|
|
||||||
for (j = 0; j < 16; j += 8) {
|
|
||||||
s += pix[0];
|
|
||||||
s += pix[1];
|
|
||||||
s += pix[2];
|
|
||||||
s += pix[3];
|
|
||||||
s += pix[4];
|
|
||||||
s += pix[5];
|
|
||||||
s += pix[6];
|
|
||||||
s += pix[7];
|
|
||||||
pix += 8;
|
|
||||||
}
|
|
||||||
pix += line_size - 16;
|
|
||||||
}
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int pix_norm1_c(uint8_t *pix, int line_size)
|
|
||||||
{
|
|
||||||
int s = 0, i, j;
|
|
||||||
uint32_t *sq = ff_square_tab + 256;
|
|
||||||
|
|
||||||
for (i = 0; i < 16; i++) {
|
|
||||||
for (j = 0; j < 16; j += 8) {
|
|
||||||
#if 0
|
|
||||||
s += sq[pix[0]];
|
|
||||||
s += sq[pix[1]];
|
|
||||||
s += sq[pix[2]];
|
|
||||||
s += sq[pix[3]];
|
|
||||||
s += sq[pix[4]];
|
|
||||||
s += sq[pix[5]];
|
|
||||||
s += sq[pix[6]];
|
|
||||||
s += sq[pix[7]];
|
|
||||||
#else
|
|
||||||
#if HAVE_FAST_64BIT
|
|
||||||
register uint64_t x = *(uint64_t *) pix;
|
|
||||||
s += sq[x & 0xff];
|
|
||||||
s += sq[(x >> 8) & 0xff];
|
|
||||||
s += sq[(x >> 16) & 0xff];
|
|
||||||
s += sq[(x >> 24) & 0xff];
|
|
||||||
s += sq[(x >> 32) & 0xff];
|
|
||||||
s += sq[(x >> 40) & 0xff];
|
|
||||||
s += sq[(x >> 48) & 0xff];
|
|
||||||
s += sq[(x >> 56) & 0xff];
|
|
||||||
#else
|
|
||||||
register uint32_t x = *(uint32_t *) pix;
|
|
||||||
s += sq[x & 0xff];
|
|
||||||
s += sq[(x >> 8) & 0xff];
|
|
||||||
s += sq[(x >> 16) & 0xff];
|
|
||||||
s += sq[(x >> 24) & 0xff];
|
|
||||||
x = *(uint32_t *) (pix + 4);
|
|
||||||
s += sq[x & 0xff];
|
|
||||||
s += sq[(x >> 8) & 0xff];
|
|
||||||
s += sq[(x >> 16) & 0xff];
|
|
||||||
s += sq[(x >> 24) & 0xff];
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
pix += 8;
|
|
||||||
}
|
|
||||||
pix += line_size - 16;
|
|
||||||
}
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
||||||
int line_size, int h)
|
int line_size, int h)
|
||||||
{
|
{
|
||||||
@ -1094,9 +1023,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
c->sum_abs_dctelem = sum_abs_dctelem_c;
|
c->sum_abs_dctelem = sum_abs_dctelem_c;
|
||||||
|
|
||||||
c->pix_sum = pix_sum_c;
|
|
||||||
c->pix_norm1 = pix_norm1_c;
|
|
||||||
|
|
||||||
/* TODO [0] 16 [1] 8 */
|
/* TODO [0] 16 [1] 8 */
|
||||||
c->pix_abs[0][0] = pix_abs16_c;
|
c->pix_abs[0][0] = pix_abs16_c;
|
||||||
c->pix_abs[0][1] = pix_abs16_x2_c;
|
c->pix_abs[0][1] = pix_abs16_x2_c;
|
||||||
@ -1141,11 +1067,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
|||||||
ff_dsputil_init_dwt(c);
|
ff_dsputil_init_dwt(c);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
c->shrink[0] = av_image_copy_plane;
|
|
||||||
c->shrink[1] = ff_shrink22;
|
|
||||||
c->shrink[2] = ff_shrink44;
|
|
||||||
c->shrink[3] = ff_shrink88;
|
|
||||||
|
|
||||||
c->draw_edges = draw_edges_8_c;
|
c->draw_edges = draw_edges_8_c;
|
||||||
|
|
||||||
switch (avctx->bits_per_raw_sample) {
|
switch (avctx->bits_per_raw_sample) {
|
||||||
|
@ -72,9 +72,6 @@ typedef struct DSPContext {
|
|||||||
int stride);
|
int stride);
|
||||||
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
|
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
|
||||||
|
|
||||||
int (*pix_sum)(uint8_t *pix, int line_size);
|
|
||||||
int (*pix_norm1)(uint8_t *pix, int line_size);
|
|
||||||
|
|
||||||
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
|
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
|
||||||
me_cmp_func sse[6];
|
me_cmp_func sse[6];
|
||||||
me_cmp_func hadamard8_diff[6];
|
me_cmp_func hadamard8_diff[6];
|
||||||
@ -108,9 +105,6 @@ typedef struct DSPContext {
|
|||||||
#define EDGE_WIDTH 16
|
#define EDGE_WIDTH 16
|
||||||
#define EDGE_TOP 1
|
#define EDGE_TOP 1
|
||||||
#define EDGE_BOTTOM 2
|
#define EDGE_BOTTOM 2
|
||||||
|
|
||||||
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
|
|
||||||
int src_wrap, int width, int height);
|
|
||||||
} DSPContext;
|
} DSPContext;
|
||||||
|
|
||||||
void ff_dsputil_static_init(void);
|
void ff_dsputil_static_init(void);
|
||||||
|
@ -903,8 +903,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
|||||||
|
|
||||||
/* intra / predictive decision */
|
/* intra / predictive decision */
|
||||||
pix = c->src[0][0];
|
pix = c->src[0][0];
|
||||||
sum = s->dsp.pix_sum(pix, s->linesize);
|
sum = s->mpvencdsp.pix_sum(pix, s->linesize);
|
||||||
varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500;
|
varc = s->mpvencdsp.pix_norm1(pix, s->linesize) -
|
||||||
|
(((unsigned) sum * sum) >> 8) + 500;
|
||||||
|
|
||||||
pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
|
pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
|
||||||
pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
|
pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
|
||||||
|
@ -1010,7 +1010,7 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src,
|
|||||||
int offset = x + y * stride;
|
int offset = x + y * stride;
|
||||||
int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
|
int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
|
||||||
16);
|
16);
|
||||||
int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
|
int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
|
||||||
int sae = get_sae(src + offset, mean, stride);
|
int sae = get_sae(src + offset, mean, stride);
|
||||||
|
|
||||||
acc += sae + 500 < sad;
|
acc += sae + 500 < sad;
|
||||||
@ -1278,15 +1278,21 @@ static int estimate_best_b_count(MpegEncContext *s)
|
|||||||
data[2] += INPLACE_OFFSET;
|
data[2] += INPLACE_OFFSET;
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
|
s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
|
||||||
data[0], pre_input.f->linesize[0],
|
s->tmp_frames[i]->linesize[0],
|
||||||
c->width, c->height);
|
data[0],
|
||||||
s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
|
pre_input.f->linesize[0],
|
||||||
data[1], pre_input.f->linesize[1],
|
c->width, c->height);
|
||||||
c->width >> 1, c->height >> 1);
|
s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
|
||||||
s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
|
s->tmp_frames[i]->linesize[1],
|
||||||
data[2], pre_input.f->linesize[2],
|
data[1],
|
||||||
c->width >> 1, c->height >> 1);
|
pre_input.f->linesize[1],
|
||||||
|
c->width >> 1, c->height >> 1);
|
||||||
|
s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
|
||||||
|
s->tmp_frames[i]->linesize[2],
|
||||||
|
data[2],
|
||||||
|
pre_input.f->linesize[2],
|
||||||
|
c->width >> 1, c->height >> 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2585,9 +2591,10 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
|
|||||||
int yy = mb_y * 16;
|
int yy = mb_y * 16;
|
||||||
uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
|
uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
|
||||||
int varc;
|
int varc;
|
||||||
int sum = s->dsp.pix_sum(pix, s->linesize);
|
int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
|
||||||
|
|
||||||
varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
|
varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
|
||||||
|
(((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
|
||||||
|
|
||||||
s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
|
s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
|
||||||
s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
|
s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
|
||||||
|
@ -22,7 +22,10 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "libavutil/avassert.h"
|
#include "libavutil/avassert.h"
|
||||||
#include "libavutil/attributes.h"
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/imgutils.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
|
#include "dsputil.h"
|
||||||
|
#include "imgconvert.h"
|
||||||
#include "mpegvideoencdsp.h"
|
#include "mpegvideoencdsp.h"
|
||||||
|
|
||||||
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
|
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
|
||||||
@ -54,12 +57,92 @@ static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
|
|||||||
(BASIS_SHIFT - RECON_SHIFT);
|
(BASIS_SHIFT - RECON_SHIFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int pix_sum_c(uint8_t *pix, int line_size)
|
||||||
|
{
|
||||||
|
int s = 0, i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
for (j = 0; j < 16; j += 8) {
|
||||||
|
s += pix[0];
|
||||||
|
s += pix[1];
|
||||||
|
s += pix[2];
|
||||||
|
s += pix[3];
|
||||||
|
s += pix[4];
|
||||||
|
s += pix[5];
|
||||||
|
s += pix[6];
|
||||||
|
s += pix[7];
|
||||||
|
pix += 8;
|
||||||
|
}
|
||||||
|
pix += line_size - 16;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int pix_norm1_c(uint8_t *pix, int line_size)
|
||||||
|
{
|
||||||
|
int s = 0, i, j;
|
||||||
|
uint32_t *sq = ff_square_tab + 256;
|
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
for (j = 0; j < 16; j += 8) {
|
||||||
|
#if 0
|
||||||
|
s += sq[pix[0]];
|
||||||
|
s += sq[pix[1]];
|
||||||
|
s += sq[pix[2]];
|
||||||
|
s += sq[pix[3]];
|
||||||
|
s += sq[pix[4]];
|
||||||
|
s += sq[pix[5]];
|
||||||
|
s += sq[pix[6]];
|
||||||
|
s += sq[pix[7]];
|
||||||
|
#else
|
||||||
|
#if HAVE_FAST_64BIT
|
||||||
|
register uint64_t x = *(uint64_t *) pix;
|
||||||
|
s += sq[x & 0xff];
|
||||||
|
s += sq[(x >> 8) & 0xff];
|
||||||
|
s += sq[(x >> 16) & 0xff];
|
||||||
|
s += sq[(x >> 24) & 0xff];
|
||||||
|
s += sq[(x >> 32) & 0xff];
|
||||||
|
s += sq[(x >> 40) & 0xff];
|
||||||
|
s += sq[(x >> 48) & 0xff];
|
||||||
|
s += sq[(x >> 56) & 0xff];
|
||||||
|
#else
|
||||||
|
register uint32_t x = *(uint32_t *) pix;
|
||||||
|
s += sq[x & 0xff];
|
||||||
|
s += sq[(x >> 8) & 0xff];
|
||||||
|
s += sq[(x >> 16) & 0xff];
|
||||||
|
s += sq[(x >> 24) & 0xff];
|
||||||
|
x = *(uint32_t *) (pix + 4);
|
||||||
|
s += sq[x & 0xff];
|
||||||
|
s += sq[(x >> 8) & 0xff];
|
||||||
|
s += sq[(x >> 16) & 0xff];
|
||||||
|
s += sq[(x >> 24) & 0xff];
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
pix += 8;
|
||||||
|
}
|
||||||
|
pix += line_size - 16;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
||||||
AVCodecContext *avctx)
|
AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
c->try_8x8basis = try_8x8basis_c;
|
c->try_8x8basis = try_8x8basis_c;
|
||||||
c->add_8x8basis = add_8x8basis_c;
|
c->add_8x8basis = add_8x8basis_c;
|
||||||
|
|
||||||
|
c->shrink[0] = av_image_copy_plane;
|
||||||
|
c->shrink[1] = ff_shrink22;
|
||||||
|
c->shrink[2] = ff_shrink44;
|
||||||
|
c->shrink[3] = ff_shrink88;
|
||||||
|
|
||||||
|
c->pix_sum = pix_sum_c;
|
||||||
|
c->pix_norm1 = pix_norm1_c;
|
||||||
|
|
||||||
|
if (ARCH_ARM)
|
||||||
|
ff_mpegvideoencdsp_init_arm(c, avctx);
|
||||||
|
if (ARCH_PPC)
|
||||||
|
ff_mpegvideoencdsp_init_ppc(c, avctx);
|
||||||
if (ARCH_X86)
|
if (ARCH_X86)
|
||||||
ff_mpegvideoencdsp_init_x86(c, avctx);
|
ff_mpegvideoencdsp_init_x86(c, avctx);
|
||||||
}
|
}
|
||||||
|
@ -31,10 +31,19 @@ typedef struct MpegvideoEncDSPContext {
|
|||||||
int16_t basis[64], int scale);
|
int16_t basis[64], int scale);
|
||||||
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
|
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
|
||||||
|
|
||||||
|
int (*pix_sum)(uint8_t *pix, int line_size);
|
||||||
|
int (*pix_norm1)(uint8_t *pix, int line_size);
|
||||||
|
|
||||||
|
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
|
||||||
|
int src_wrap, int width, int height);
|
||||||
} MpegvideoEncDSPContext;
|
} MpegvideoEncDSPContext;
|
||||||
|
|
||||||
void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
||||||
AVCodecContext *avctx);
|
AVCodecContext *avctx);
|
||||||
|
void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx);
|
||||||
|
void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx);
|
||||||
void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
||||||
AVCodecContext *avctx);
|
AVCodecContext *avctx);
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ OBJS-$(CONFIG_IDCTDSP) += ppc/idctdsp.o
|
|||||||
OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o
|
OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
|
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
|
||||||
ppc/mpegvideodsp.o
|
ppc/mpegvideodsp.o
|
||||||
|
OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o
|
||||||
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
|
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
|
||||||
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
|
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
|
||||||
|
|
||||||
|
@ -308,34 +308,6 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pix_norm1_altivec(uint8_t *pix, int line_size)
|
|
||||||
{
|
|
||||||
int i, s = 0;
|
|
||||||
const vector unsigned int zero =
|
|
||||||
(const vector unsigned int) vec_splat_u32(0);
|
|
||||||
vector unsigned char perm = vec_lvsl(0, pix);
|
|
||||||
vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
|
|
||||||
vector signed int sum;
|
|
||||||
|
|
||||||
for (i = 0; i < 16; i++) {
|
|
||||||
/* Read the potentially unaligned pixels. */
|
|
||||||
vector unsigned char pixl = vec_ld(0, pix);
|
|
||||||
vector unsigned char pixr = vec_ld(15, pix);
|
|
||||||
vector unsigned char pixv = vec_perm(pixl, pixr, perm);
|
|
||||||
|
|
||||||
/* Square the values, and add them to our sum. */
|
|
||||||
sv = vec_msum(pixv, pixv, sv);
|
|
||||||
|
|
||||||
pix += line_size;
|
|
||||||
}
|
|
||||||
/* Sum up the four partial sums, and put the result into s. */
|
|
||||||
sum = vec_sums((vector signed int) sv, (vector signed int) zero);
|
|
||||||
sum = vec_splat(sum, 3);
|
|
||||||
vec_ste(sum, 0, &s);
|
|
||||||
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
|
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
|
||||||
* It's the sad8_altivec code above w/ squaring added. */
|
* It's the sad8_altivec code above w/ squaring added. */
|
||||||
static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
||||||
@ -430,35 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pix_sum_altivec(uint8_t *pix, int line_size)
|
|
||||||
{
|
|
||||||
int i, s;
|
|
||||||
const vector unsigned int zero =
|
|
||||||
(const vector unsigned int) vec_splat_u32(0);
|
|
||||||
vector unsigned char perm = vec_lvsl(0, pix);
|
|
||||||
vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
|
|
||||||
vector signed int sumdiffs;
|
|
||||||
|
|
||||||
for (i = 0; i < 16; i++) {
|
|
||||||
/* Read the potentially unaligned 16 pixels into t1. */
|
|
||||||
vector unsigned char pixl = vec_ld(0, pix);
|
|
||||||
vector unsigned char pixr = vec_ld(15, pix);
|
|
||||||
vector unsigned char t1 = vec_perm(pixl, pixr, perm);
|
|
||||||
|
|
||||||
/* Add each 4 pixel group together and put 4 results into sad. */
|
|
||||||
sad = vec_sum4s(t1, sad);
|
|
||||||
|
|
||||||
pix += line_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Sum up the four partial sums, and put the result into s. */
|
|
||||||
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
|
|
||||||
sumdiffs = vec_splat(sumdiffs, 3);
|
|
||||||
vec_ste(sumdiffs, 0, &s);
|
|
||||||
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
|
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
|
||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
@ -911,9 +854,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
|
|||||||
c->sse[0] = sse16_altivec;
|
c->sse[0] = sse16_altivec;
|
||||||
c->sse[1] = sse8_altivec;
|
c->sse[1] = sse8_altivec;
|
||||||
|
|
||||||
c->pix_norm1 = pix_norm1_altivec;
|
|
||||||
c->pix_sum = pix_sum_altivec;
|
|
||||||
|
|
||||||
c->diff_pixels = diff_pixels_altivec;
|
c->diff_pixels = diff_pixels_altivec;
|
||||||
|
|
||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
|
103
libavcodec/ppc/mpegvideoencdsp.c
Normal file
103
libavcodec/ppc/mpegvideoencdsp.c
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#if HAVE_ALTIVEC_H
|
||||||
|
#include <altivec.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/ppc/cpu.h"
|
||||||
|
#include "libavutil/ppc/types_altivec.h"
|
||||||
|
#include "libavutil/ppc/util_altivec.h"
|
||||||
|
#include "libavcodec/mpegvideoencdsp.h"
|
||||||
|
|
||||||
|
#if HAVE_ALTIVEC
|
||||||
|
|
||||||
|
static int pix_norm1_altivec(uint8_t *pix, int line_size)
|
||||||
|
{
|
||||||
|
int i, s = 0;
|
||||||
|
const vector unsigned int zero =
|
||||||
|
(const vector unsigned int) vec_splat_u32(0);
|
||||||
|
vector unsigned char perm = vec_lvsl(0, pix);
|
||||||
|
vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
|
||||||
|
vector signed int sum;
|
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
/* Read the potentially unaligned pixels. */
|
||||||
|
vector unsigned char pixl = vec_ld(0, pix);
|
||||||
|
vector unsigned char pixr = vec_ld(15, pix);
|
||||||
|
vector unsigned char pixv = vec_perm(pixl, pixr, perm);
|
||||||
|
|
||||||
|
/* Square the values, and add them to our sum. */
|
||||||
|
sv = vec_msum(pixv, pixv, sv);
|
||||||
|
|
||||||
|
pix += line_size;
|
||||||
|
}
|
||||||
|
/* Sum up the four partial sums, and put the result into s. */
|
||||||
|
sum = vec_sums((vector signed int) sv, (vector signed int) zero);
|
||||||
|
sum = vec_splat(sum, 3);
|
||||||
|
vec_ste(sum, 0, &s);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int pix_sum_altivec(uint8_t *pix, int line_size)
|
||||||
|
{
|
||||||
|
int i, s;
|
||||||
|
const vector unsigned int zero =
|
||||||
|
(const vector unsigned int) vec_splat_u32(0);
|
||||||
|
vector unsigned char perm = vec_lvsl(0, pix);
|
||||||
|
vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
|
||||||
|
vector signed int sumdiffs;
|
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
/* Read the potentially unaligned 16 pixels into t1. */
|
||||||
|
vector unsigned char pixl = vec_ld(0, pix);
|
||||||
|
vector unsigned char pixr = vec_ld(15, pix);
|
||||||
|
vector unsigned char t1 = vec_perm(pixl, pixr, perm);
|
||||||
|
|
||||||
|
/* Add each 4 pixel group together and put 4 results into sad. */
|
||||||
|
sad = vec_sum4s(t1, sad);
|
||||||
|
|
||||||
|
pix += line_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sum up the four partial sums, and put the result into s. */
|
||||||
|
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
|
||||||
|
sumdiffs = vec_splat(sumdiffs, 3);
|
||||||
|
vec_ste(sumdiffs, 0, &s);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
|
||||||
|
av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx)
|
||||||
|
{
|
||||||
|
#if HAVE_ALTIVEC
|
||||||
|
if (!PPC_ALTIVEC(av_get_cpu_flags()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
c->pix_norm1 = pix_norm1_altivec;
|
||||||
|
c->pix_sum = pix_sum_altivec;
|
||||||
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
}
|
@ -517,6 +517,7 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
ff_dsputil_init(&s->dsp, avctx);
|
ff_dsputil_init(&s->dsp, avctx);
|
||||||
ff_hpeldsp_init(&s->hdsp, avctx->flags);
|
ff_hpeldsp_init(&s->hdsp, avctx->flags);
|
||||||
|
ff_mpegvideoencdsp_init(&s->m.mpvencdsp, avctx);
|
||||||
|
|
||||||
avctx->coded_frame = av_frame_alloc();
|
avctx->coded_frame = av_frame_alloc();
|
||||||
s->current_picture = av_frame_alloc();
|
s->current_picture = av_frame_alloc();
|
||||||
|
@ -109,6 +109,7 @@ YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o
|
|||||||
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
|
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
|
||||||
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
|
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
|
||||||
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
|
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
|
||||||
|
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o
|
||||||
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
|
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
|
||||||
x86/fpel.o \
|
x86/fpel.o \
|
||||||
x86/qpel.o
|
x86/qpel.o
|
||||||
|
@ -23,10 +23,6 @@
|
|||||||
|
|
||||||
%include "libavutil/x86/x86util.asm"
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
SECTION_RODATA
|
|
||||||
|
|
||||||
cextern pw_1
|
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
%macro DIFF_PIXELS_1 4
|
%macro DIFF_PIXELS_1 4
|
||||||
@ -465,113 +461,6 @@ cglobal diff_pixels, 4, 5, 5
|
|||||||
jne .loop
|
jne .loop
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
|
|
||||||
; %1 = number of xmm registers used
|
|
||||||
; %2 = number of loops
|
|
||||||
; %3 = number of GPRs used
|
|
||||||
%macro PIX_SUM16 4
|
|
||||||
cglobal pix_sum16, 2, %3, %1
|
|
||||||
movsxdifnidn r1, r1d
|
|
||||||
mov r2, %2
|
|
||||||
%if cpuflag(xop)
|
|
||||||
lea r3, [r1*3]
|
|
||||||
%else
|
|
||||||
pxor m5, m5
|
|
||||||
%endif
|
|
||||||
pxor m4, m4
|
|
||||||
.loop:
|
|
||||||
%if cpuflag(xop)
|
|
||||||
vphaddubq m0, [r0]
|
|
||||||
vphaddubq m1, [r0+r1]
|
|
||||||
vphaddubq m2, [r0+r1*2]
|
|
||||||
vphaddubq m3, [r0+r3]
|
|
||||||
%else
|
|
||||||
mova m0, [r0]
|
|
||||||
%if mmsize == 8
|
|
||||||
mova m1, [r0+8]
|
|
||||||
%else
|
|
||||||
mova m1, [r0+r1]
|
|
||||||
%endif
|
|
||||||
punpckhbw m2, m0, m5
|
|
||||||
punpcklbw m0, m5
|
|
||||||
punpckhbw m3, m1, m5
|
|
||||||
punpcklbw m1, m5
|
|
||||||
%endif ; cpuflag(xop)
|
|
||||||
paddw m1, m0
|
|
||||||
paddw m3, m2
|
|
||||||
paddw m3, m1
|
|
||||||
paddw m4, m3
|
|
||||||
%if mmsize == 8
|
|
||||||
add r0, r1
|
|
||||||
%else
|
|
||||||
lea r0, [r0+r1*%4]
|
|
||||||
%endif
|
|
||||||
dec r2
|
|
||||||
jne .loop
|
|
||||||
%if cpuflag(xop)
|
|
||||||
pshufd m0, m4, q0032
|
|
||||||
paddd m4, m0
|
|
||||||
%else
|
|
||||||
HADDW m4, m5
|
|
||||||
%endif
|
|
||||||
movd eax, m4
|
|
||||||
RET
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
INIT_MMX mmx
|
|
||||||
PIX_SUM16 0, 16, 3, 0
|
|
||||||
INIT_XMM sse2
|
|
||||||
PIX_SUM16 6, 8, 3, 2
|
|
||||||
%if HAVE_XOP_EXTERNAL
|
|
||||||
INIT_XMM xop
|
|
||||||
PIX_SUM16 5, 4, 4, 4
|
|
||||||
%endif
|
|
||||||
|
|
||||||
; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
|
|
||||||
; %1 = number of xmm registers used
|
|
||||||
; %2 = number of loops
|
|
||||||
%macro PIX_NORM1 2
|
|
||||||
cglobal pix_norm1, 2, 3, %1
|
|
||||||
movsxdifnidn r1, r1d
|
|
||||||
mov r2, %2
|
|
||||||
pxor m0, m0
|
|
||||||
pxor m5, m5
|
|
||||||
.loop:
|
|
||||||
mova m2, [r0+0]
|
|
||||||
%if mmsize == 8
|
|
||||||
mova m3, [r0+8]
|
|
||||||
%else
|
|
||||||
mova m3, [r0+r1]
|
|
||||||
%endif
|
|
||||||
punpckhbw m1, m2, m0
|
|
||||||
punpcklbw m2, m0
|
|
||||||
punpckhbw m4, m3, m0
|
|
||||||
punpcklbw m3, m0
|
|
||||||
pmaddwd m1, m1
|
|
||||||
pmaddwd m2, m2
|
|
||||||
pmaddwd m3, m3
|
|
||||||
pmaddwd m4, m4
|
|
||||||
paddd m2, m1
|
|
||||||
paddd m4, m3
|
|
||||||
paddd m5, m2
|
|
||||||
paddd m5, m4
|
|
||||||
%if mmsize == 8
|
|
||||||
add r0, r1
|
|
||||||
%else
|
|
||||||
lea r0, [r0+r1*2]
|
|
||||||
%endif
|
|
||||||
dec r2
|
|
||||||
jne .loop
|
|
||||||
HADDD m5, m1
|
|
||||||
movd eax, m5
|
|
||||||
RET
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
INIT_MMX mmx
|
|
||||||
PIX_NORM1 0, 16
|
|
||||||
INIT_XMM sse2
|
|
||||||
PIX_NORM1 6, 8
|
|
||||||
|
|
||||||
;-----------------------------------------------
|
;-----------------------------------------------
|
||||||
;int ff_sum_abs_dctelem(int16_t *block)
|
;int ff_sum_abs_dctelem(int16_t *block)
|
||||||
;-----------------------------------------------
|
;-----------------------------------------------
|
||||||
|
@ -37,11 +37,6 @@ void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
|||||||
int stride);
|
int stride);
|
||||||
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
||||||
int stride);
|
int stride);
|
||||||
int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
|
|
||||||
int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
|
|
||||||
int ff_pix_sum16_xop(uint8_t *pix, int line_size);
|
|
||||||
int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
|
|
||||||
int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
|
|
||||||
int ff_sum_abs_dctelem_mmx(int16_t *block);
|
int ff_sum_abs_dctelem_mmx(int16_t *block);
|
||||||
int ff_sum_abs_dctelem_mmxext(int16_t *block);
|
int ff_sum_abs_dctelem_mmxext(int16_t *block);
|
||||||
int ff_sum_abs_dctelem_sse2(int16_t *block);
|
int ff_sum_abs_dctelem_sse2(int16_t *block);
|
||||||
@ -364,8 +359,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
if (!high_bit_depth)
|
if (!high_bit_depth)
|
||||||
c->get_pixels = ff_get_pixels_mmx;
|
c->get_pixels = ff_get_pixels_mmx;
|
||||||
c->diff_pixels = ff_diff_pixels_mmx;
|
c->diff_pixels = ff_diff_pixels_mmx;
|
||||||
c->pix_sum = ff_pix_sum16_mmx;
|
|
||||||
c->pix_norm1 = ff_pix_norm1_mmx;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags))
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
@ -431,8 +424,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
c->sse[0] = ff_sse16_sse2;
|
c->sse[0] = ff_sse16_sse2;
|
||||||
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
|
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
|
||||||
c->diff_pixels = ff_diff_pixels_sse2;
|
c->diff_pixels = ff_diff_pixels_sse2;
|
||||||
c->pix_sum = ff_pix_sum16_sse2;
|
|
||||||
c->pix_norm1 = ff_pix_norm1_sse2;
|
|
||||||
|
|
||||||
#if HAVE_ALIGNED_STACK
|
#if HAVE_ALIGNED_STACK
|
||||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
|
||||||
@ -448,9 +439,5 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_XOP(cpu_flags)) {
|
|
||||||
c->pix_sum = ff_pix_sum16_xop;
|
|
||||||
}
|
|
||||||
|
|
||||||
ff_dsputil_init_pix_mmx(c, avctx);
|
ff_dsputil_init_pix_mmx(c, avctx);
|
||||||
}
|
}
|
||||||
|
137
libavcodec/x86/mpegvideoencdsp.asm
Normal file
137
libavcodec/x86/mpegvideoencdsp.asm
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
;*****************************************************************************
|
||||||
|
;* SIMD-optimized MPEG encoding functions
|
||||||
|
;*****************************************************************************
|
||||||
|
;* Copyright (c) 2000, 2001 Fabrice Bellard
|
||||||
|
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;*****************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION_RODATA
|
||||||
|
|
||||||
|
cextern pw_1
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
|
||||||
|
; %1 = number of xmm registers used
|
||||||
|
; %2 = number of loops
|
||||||
|
; %3 = number of GPRs used
|
||||||
|
%macro PIX_SUM16 4
|
||||||
|
cglobal pix_sum16, 2, %3, %1
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
|
mov r2, %2
|
||||||
|
%if cpuflag(xop)
|
||||||
|
lea r3, [r1*3]
|
||||||
|
%else
|
||||||
|
pxor m5, m5
|
||||||
|
%endif
|
||||||
|
pxor m4, m4
|
||||||
|
.loop:
|
||||||
|
%if cpuflag(xop)
|
||||||
|
vphaddubq m0, [r0]
|
||||||
|
vphaddubq m1, [r0+r1]
|
||||||
|
vphaddubq m2, [r0+r1*2]
|
||||||
|
vphaddubq m3, [r0+r3]
|
||||||
|
%else
|
||||||
|
mova m0, [r0]
|
||||||
|
%if mmsize == 8
|
||||||
|
mova m1, [r0+8]
|
||||||
|
%else
|
||||||
|
mova m1, [r0+r1]
|
||||||
|
%endif
|
||||||
|
punpckhbw m2, m0, m5
|
||||||
|
punpcklbw m0, m5
|
||||||
|
punpckhbw m3, m1, m5
|
||||||
|
punpcklbw m1, m5
|
||||||
|
%endif ; cpuflag(xop)
|
||||||
|
paddw m1, m0
|
||||||
|
paddw m3, m2
|
||||||
|
paddw m3, m1
|
||||||
|
paddw m4, m3
|
||||||
|
%if mmsize == 8
|
||||||
|
add r0, r1
|
||||||
|
%else
|
||||||
|
lea r0, [r0+r1*%4]
|
||||||
|
%endif
|
||||||
|
dec r2
|
||||||
|
jne .loop
|
||||||
|
%if cpuflag(xop)
|
||||||
|
pshufd m0, m4, q0032
|
||||||
|
paddd m4, m0
|
||||||
|
%else
|
||||||
|
HADDW m4, m5
|
||||||
|
%endif
|
||||||
|
movd eax, m4
|
||||||
|
RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX mmx
|
||||||
|
PIX_SUM16 0, 16, 3, 0
|
||||||
|
INIT_XMM sse2
|
||||||
|
PIX_SUM16 6, 8, 3, 2
|
||||||
|
%if HAVE_XOP_EXTERNAL
|
||||||
|
INIT_XMM xop
|
||||||
|
PIX_SUM16 5, 4, 4, 4
|
||||||
|
%endif
|
||||||
|
|
||||||
|
; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
|
||||||
|
; %1 = number of xmm registers used
|
||||||
|
; %2 = number of loops
|
||||||
|
%macro PIX_NORM1 2
|
||||||
|
cglobal pix_norm1, 2, 3, %1
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
|
mov r2, %2
|
||||||
|
pxor m0, m0
|
||||||
|
pxor m5, m5
|
||||||
|
.loop:
|
||||||
|
mova m2, [r0+0]
|
||||||
|
%if mmsize == 8
|
||||||
|
mova m3, [r0+8]
|
||||||
|
%else
|
||||||
|
mova m3, [r0+r1]
|
||||||
|
%endif
|
||||||
|
punpckhbw m1, m2, m0
|
||||||
|
punpcklbw m2, m0
|
||||||
|
punpckhbw m4, m3, m0
|
||||||
|
punpcklbw m3, m0
|
||||||
|
pmaddwd m1, m1
|
||||||
|
pmaddwd m2, m2
|
||||||
|
pmaddwd m3, m3
|
||||||
|
pmaddwd m4, m4
|
||||||
|
paddd m2, m1
|
||||||
|
paddd m4, m3
|
||||||
|
paddd m5, m2
|
||||||
|
paddd m5, m4
|
||||||
|
%if mmsize == 8
|
||||||
|
add r0, r1
|
||||||
|
%else
|
||||||
|
lea r0, [r0+r1*2]
|
||||||
|
%endif
|
||||||
|
dec r2
|
||||||
|
jne .loop
|
||||||
|
HADDD m5, m1
|
||||||
|
movd eax, m5
|
||||||
|
RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX mmx
|
||||||
|
PIX_NORM1 0, 16
|
||||||
|
INIT_XMM sse2
|
||||||
|
PIX_NORM1 6, 8
|
||||||
|
|
@ -22,6 +22,12 @@
|
|||||||
#include "libavcodec/avcodec.h"
|
#include "libavcodec/avcodec.h"
|
||||||
#include "libavcodec/mpegvideoencdsp.h"
|
#include "libavcodec/mpegvideoencdsp.h"
|
||||||
|
|
||||||
|
int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
|
||||||
|
int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
|
||||||
|
int ff_pix_sum16_xop(uint8_t *pix, int line_size);
|
||||||
|
int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
|
||||||
|
int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
#define PHADDD(a, t) \
|
#define PHADDD(a, t) \
|
||||||
@ -95,9 +101,24 @@
|
|||||||
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
||||||
AVCodecContext *avctx)
|
AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
#if HAVE_INLINE_ASM
|
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
|
c->pix_sum = ff_pix_sum16_mmx;
|
||||||
|
c->pix_norm1 = ff_pix_norm1_mmx;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
|
c->pix_sum = ff_pix_sum16_sse2;
|
||||||
|
c->pix_norm1 = ff_pix_norm1_sse2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_XOP(cpu_flags)) {
|
||||||
|
c->pix_sum = ff_pix_sum16_xop;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
if (INLINE_MMX(cpu_flags)) {
|
if (INLINE_MMX(cpu_flags)) {
|
||||||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
||||||
c->try_8x8basis = try_8x8basis_mmx;
|
c->try_8x8basis = try_8x8basis_mmx;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user