x86/vf_noise: move asm code to a separate file
Reviewed-by: Michael Niedermayer <michaelni@gmx.at> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
f6bb2cd1b0
commit
864f9326fb
@ -29,43 +29,12 @@
|
|||||||
#include "libavutil/lfg.h"
|
#include "libavutil/lfg.h"
|
||||||
#include "libavutil/parseutils.h"
|
#include "libavutil/parseutils.h"
|
||||||
#include "libavutil/pixdesc.h"
|
#include "libavutil/pixdesc.h"
|
||||||
#include "libavutil/x86/asm.h"
|
|
||||||
#include "avfilter.h"
|
#include "avfilter.h"
|
||||||
#include "formats.h"
|
#include "formats.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
#include "vf_noise.h"
|
||||||
#include "video.h"
|
#include "video.h"
|
||||||
|
|
||||||
#define MAX_NOISE 5120
|
|
||||||
#define MAX_SHIFT 1024
|
|
||||||
#define MAX_RES (MAX_NOISE-MAX_SHIFT)
|
|
||||||
|
|
||||||
#define NOISE_UNIFORM 1
|
|
||||||
#define NOISE_TEMPORAL 2
|
|
||||||
#define NOISE_AVERAGED 8
|
|
||||||
#define NOISE_PATTERN 16
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int strength;
|
|
||||||
unsigned flags;
|
|
||||||
AVLFG lfg;
|
|
||||||
int seed;
|
|
||||||
int8_t *noise;
|
|
||||||
int8_t *prev_shift[MAX_RES][3];
|
|
||||||
int rand_shift[MAX_RES];
|
|
||||||
int rand_shift_init;
|
|
||||||
} FilterParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const AVClass *class;
|
|
||||||
int nb_planes;
|
|
||||||
int bytewidth[4];
|
|
||||||
int height[4];
|
|
||||||
FilterParams all;
|
|
||||||
FilterParams param[4];
|
|
||||||
void (*line_noise)(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift);
|
|
||||||
void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift);
|
|
||||||
} NoiseContext;
|
|
||||||
|
|
||||||
typedef struct ThreadData {
|
typedef struct ThreadData {
|
||||||
AVFrame *in, *out;
|
AVFrame *in, *out;
|
||||||
} ThreadData;
|
} ThreadData;
|
||||||
@ -193,8 +162,8 @@ static int config_input(AVFilterLink *inlink)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise,
|
void ff_line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise,
|
||||||
int len, int shift)
|
int len, int shift)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -206,70 +175,8 @@ static inline void line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t"
|
void ff_line_noise_avg_c(uint8_t *dst, const uint8_t *src,
|
||||||
|
int len, const int8_t * const *shift)
|
||||||
static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
|
|
||||||
const int8_t *noise, int len, int shift)
|
|
||||||
{
|
|
||||||
#if HAVE_MMX_INLINE
|
|
||||||
x86_reg mmx_len= len&(~7);
|
|
||||||
noise+=shift;
|
|
||||||
|
|
||||||
__asm__ volatile(
|
|
||||||
"mov %3, %%"REG_a" \n\t"
|
|
||||||
"pcmpeqb %%mm7, %%mm7 \n\t"
|
|
||||||
"psllw $15, %%mm7 \n\t"
|
|
||||||
"packsswb %%mm7, %%mm7 \n\t"
|
|
||||||
ASMALIGN(4)
|
|
||||||
"1: \n\t"
|
|
||||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
|
||||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
|
||||||
"pxor %%mm7, %%mm0 \n\t"
|
|
||||||
"paddsb %%mm1, %%mm0 \n\t"
|
|
||||||
"pxor %%mm7, %%mm0 \n\t"
|
|
||||||
"movq %%mm0, (%2, %%"REG_a") \n\t"
|
|
||||||
"add $8, %%"REG_a" \n\t"
|
|
||||||
" js 1b \n\t"
|
|
||||||
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
|
|
||||||
: "%"REG_a
|
|
||||||
);
|
|
||||||
if (mmx_len!=len)
|
|
||||||
line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
|
|
||||||
const int8_t *noise, int len, int shift)
|
|
||||||
{
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
|
||||||
x86_reg mmx_len= len&(~7);
|
|
||||||
noise+=shift;
|
|
||||||
|
|
||||||
__asm__ volatile(
|
|
||||||
"mov %3, %%"REG_a" \n\t"
|
|
||||||
"pcmpeqb %%mm7, %%mm7 \n\t"
|
|
||||||
"psllw $15, %%mm7 \n\t"
|
|
||||||
"packsswb %%mm7, %%mm7 \n\t"
|
|
||||||
ASMALIGN(4)
|
|
||||||
"1: \n\t"
|
|
||||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
|
||||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
|
||||||
"pxor %%mm7, %%mm0 \n\t"
|
|
||||||
"paddsb %%mm1, %%mm0 \n\t"
|
|
||||||
"pxor %%mm7, %%mm0 \n\t"
|
|
||||||
"movntq %%mm0, (%2, %%"REG_a") \n\t"
|
|
||||||
"add $8, %%"REG_a" \n\t"
|
|
||||||
" js 1b \n\t"
|
|
||||||
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
|
|
||||||
: "%"REG_a
|
|
||||||
);
|
|
||||||
if (mmx_len != len)
|
|
||||||
line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
|
|
||||||
int len, const int8_t * const *shift)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
const int8_t *src2 = (const int8_t*)src;
|
const int8_t *src2 = (const int8_t*)src;
|
||||||
@ -280,50 +187,6 @@ static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
|
|
||||||
int len, const int8_t * const *shift)
|
|
||||||
{
|
|
||||||
#if HAVE_MMX_INLINE && HAVE_6REGS
|
|
||||||
x86_reg mmx_len= len&(~7);
|
|
||||||
|
|
||||||
__asm__ volatile(
|
|
||||||
"mov %5, %%"REG_a" \n\t"
|
|
||||||
ASMALIGN(4)
|
|
||||||
"1: \n\t"
|
|
||||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
|
||||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
|
||||||
"paddb (%2, %%"REG_a"), %%mm1 \n\t"
|
|
||||||
"paddb (%3, %%"REG_a"), %%mm1 \n\t"
|
|
||||||
"movq %%mm0, %%mm2 \n\t"
|
|
||||||
"movq %%mm1, %%mm3 \n\t"
|
|
||||||
"punpcklbw %%mm0, %%mm0 \n\t"
|
|
||||||
"punpckhbw %%mm2, %%mm2 \n\t"
|
|
||||||
"punpcklbw %%mm1, %%mm1 \n\t"
|
|
||||||
"punpckhbw %%mm3, %%mm3 \n\t"
|
|
||||||
"pmulhw %%mm0, %%mm1 \n\t"
|
|
||||||
"pmulhw %%mm2, %%mm3 \n\t"
|
|
||||||
"paddw %%mm1, %%mm1 \n\t"
|
|
||||||
"paddw %%mm3, %%mm3 \n\t"
|
|
||||||
"paddw %%mm0, %%mm1 \n\t"
|
|
||||||
"paddw %%mm2, %%mm3 \n\t"
|
|
||||||
"psrlw $8, %%mm1 \n\t"
|
|
||||||
"psrlw $8, %%mm3 \n\t"
|
|
||||||
"packuswb %%mm3, %%mm1 \n\t"
|
|
||||||
"movq %%mm1, (%4, %%"REG_a") \n\t"
|
|
||||||
"add $8, %%"REG_a" \n\t"
|
|
||||||
" js 1b \n\t"
|
|
||||||
:: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
|
|
||||||
"r" (dst+mmx_len), "g" (-mmx_len)
|
|
||||||
: "%"REG_a
|
|
||||||
);
|
|
||||||
|
|
||||||
if (mmx_len != len){
|
|
||||||
const int8_t *shift2[3]={shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len};
|
|
||||||
line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void noise(uint8_t *dst, const uint8_t *src,
|
static void noise(uint8_t *dst, const uint8_t *src,
|
||||||
int dst_linesize, int src_linesize,
|
int dst_linesize, int src_linesize,
|
||||||
int width, int start, int end, NoiseContext *n, int comp)
|
int width, int start, int end, NoiseContext *n, int comp)
|
||||||
@ -421,7 +284,6 @@ static av_cold int init(AVFilterContext *ctx)
|
|||||||
{
|
{
|
||||||
NoiseContext *n = ctx->priv;
|
NoiseContext *n = ctx->priv;
|
||||||
int ret, i;
|
int ret, i;
|
||||||
int cpu_flags = av_get_cpu_flags();
|
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
if (n->all.seed >= 0)
|
if (n->all.seed >= 0)
|
||||||
@ -439,19 +301,11 @@ static av_cold int init(AVFilterContext *ctx)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
n->line_noise = line_noise_c;
|
n->line_noise = ff_line_noise_c;
|
||||||
n->line_noise_avg = line_noise_avg_c;
|
n->line_noise_avg = ff_line_noise_avg_c;
|
||||||
|
|
||||||
if (HAVE_MMX_INLINE &&
|
if (ARCH_X86)
|
||||||
cpu_flags & AV_CPU_FLAG_MMX) {
|
ff_noise_init_x86(n);
|
||||||
n->line_noise = line_noise_mmx;
|
|
||||||
#if HAVE_6REGS
|
|
||||||
n->line_noise_avg = line_noise_avg_mmx;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
if (HAVE_MMXEXT_INLINE &&
|
|
||||||
cpu_flags & AV_CPU_FLAG_MMXEXT)
|
|
||||||
n->line_noise = line_noise_mmxext;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
64
libavfilter/vf_noise.h
Normal file
64
libavfilter/vf_noise.h
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
* Copyright (c) 2013 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVFILTER_NOISE_H
|
||||||
|
#define AVFILTER_NOISE_H
|
||||||
|
|
||||||
|
#include "libavutil/lfg.h"
|
||||||
|
#include "avfilter.h"
|
||||||
|
|
||||||
|
#define MAX_NOISE 5120
|
||||||
|
#define MAX_SHIFT 1024
|
||||||
|
#define MAX_RES (MAX_NOISE-MAX_SHIFT)
|
||||||
|
|
||||||
|
#define NOISE_UNIFORM 1
|
||||||
|
#define NOISE_TEMPORAL 2
|
||||||
|
#define NOISE_AVERAGED 8
|
||||||
|
#define NOISE_PATTERN 16
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int strength;
|
||||||
|
unsigned flags;
|
||||||
|
AVLFG lfg;
|
||||||
|
int seed;
|
||||||
|
int8_t *noise;
|
||||||
|
int8_t *prev_shift[MAX_RES][3];
|
||||||
|
int rand_shift[MAX_RES];
|
||||||
|
int rand_shift_init;
|
||||||
|
} FilterParams;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const AVClass *class;
|
||||||
|
int nb_planes;
|
||||||
|
int bytewidth[4];
|
||||||
|
int height[4];
|
||||||
|
FilterParams all;
|
||||||
|
FilterParams param[4];
|
||||||
|
void (*line_noise)(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift);
|
||||||
|
void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift);
|
||||||
|
} NoiseContext;
|
||||||
|
|
||||||
|
void ff_line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift);
|
||||||
|
void ff_line_noise_avg_c(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift);
|
||||||
|
|
||||||
|
void ff_noise_init_x86(NoiseContext *n);
|
||||||
|
|
||||||
|
#endif /* AVFILTER_NOISE_H */
|
@ -1,6 +1,7 @@
|
|||||||
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
|
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
|
||||||
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
||||||
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
||||||
|
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
|
||||||
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
||||||
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
|
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
|
||||||
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
||||||
|
144
libavfilter/x86/vf_noise.c
Normal file
144
libavfilter/x86/vf_noise.c
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
* Copyright (c) 2013 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavutil/x86/asm.h"
|
||||||
|
#include "libavfilter/vf_noise.h"
|
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
|
||||||
|
const int8_t *noise, int len, int shift)
|
||||||
|
{
|
||||||
|
x86_reg mmx_len= len & (~7);
|
||||||
|
noise += shift;
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"mov %3, %%"REG_a" \n\t"
|
||||||
|
"pcmpeqb %%mm7, %%mm7 \n\t"
|
||||||
|
"psllw $15, %%mm7 \n\t"
|
||||||
|
"packsswb %%mm7, %%mm7 \n\t"
|
||||||
|
".p2align 4 \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||||
|
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
|
"pxor %%mm7, %%mm0 \n\t"
|
||||||
|
"paddsb %%mm1, %%mm0 \n\t"
|
||||||
|
"pxor %%mm7, %%mm0 \n\t"
|
||||||
|
"movq %%mm0, (%2, %%"REG_a") \n\t"
|
||||||
|
"add $8, %%"REG_a" \n\t"
|
||||||
|
" js 1b \n\t"
|
||||||
|
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
|
||||||
|
: "%"REG_a
|
||||||
|
);
|
||||||
|
if (mmx_len != len)
|
||||||
|
ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if HAVE_6REGS
|
||||||
|
static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
|
||||||
|
int len, const int8_t * const *shift)
|
||||||
|
{
|
||||||
|
x86_reg mmx_len = len & (~7);
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"mov %5, %%"REG_a" \n\t"
|
||||||
|
".p2align 4 \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
|
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||||
|
"paddb (%2, %%"REG_a"), %%mm1 \n\t"
|
||||||
|
"paddb (%3, %%"REG_a"), %%mm1 \n\t"
|
||||||
|
"movq %%mm0, %%mm2 \n\t"
|
||||||
|
"movq %%mm1, %%mm3 \n\t"
|
||||||
|
"punpcklbw %%mm0, %%mm0 \n\t"
|
||||||
|
"punpckhbw %%mm2, %%mm2 \n\t"
|
||||||
|
"punpcklbw %%mm1, %%mm1 \n\t"
|
||||||
|
"punpckhbw %%mm3, %%mm3 \n\t"
|
||||||
|
"pmulhw %%mm0, %%mm1 \n\t"
|
||||||
|
"pmulhw %%mm2, %%mm3 \n\t"
|
||||||
|
"paddw %%mm1, %%mm1 \n\t"
|
||||||
|
"paddw %%mm3, %%mm3 \n\t"
|
||||||
|
"paddw %%mm0, %%mm1 \n\t"
|
||||||
|
"paddw %%mm2, %%mm3 \n\t"
|
||||||
|
"psrlw $8, %%mm1 \n\t"
|
||||||
|
"psrlw $8, %%mm3 \n\t"
|
||||||
|
"packuswb %%mm3, %%mm1 \n\t"
|
||||||
|
"movq %%mm1, (%4, %%"REG_a") \n\t"
|
||||||
|
"add $8, %%"REG_a" \n\t"
|
||||||
|
" js 1b \n\t"
|
||||||
|
:: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
|
||||||
|
"r" (dst+mmx_len), "g" (-mmx_len)
|
||||||
|
: "%"REG_a
|
||||||
|
);
|
||||||
|
|
||||||
|
if (mmx_len != len){
|
||||||
|
const int8_t *shift2[3] = { shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len };
|
||||||
|
ff_line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* HAVE_6REGS */
|
||||||
|
|
||||||
|
static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
|
||||||
|
const int8_t *noise, int len, int shift)
|
||||||
|
{
|
||||||
|
x86_reg mmx_len = len & (~7);
|
||||||
|
noise += shift;
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"mov %3, %%"REG_a" \n\t"
|
||||||
|
"pcmpeqb %%mm7, %%mm7 \n\t"
|
||||||
|
"psllw $15, %%mm7 \n\t"
|
||||||
|
"packsswb %%mm7, %%mm7 \n\t"
|
||||||
|
".p2align 4 \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||||
|
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
|
"pxor %%mm7, %%mm0 \n\t"
|
||||||
|
"paddsb %%mm1, %%mm0 \n\t"
|
||||||
|
"pxor %%mm7, %%mm0 \n\t"
|
||||||
|
"movntq %%mm0, (%2, %%"REG_a") \n\t"
|
||||||
|
"add $8, %%"REG_a" \n\t"
|
||||||
|
" js 1b \n\t"
|
||||||
|
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
|
||||||
|
: "%"REG_a
|
||||||
|
);
|
||||||
|
if (mmx_len != len)
|
||||||
|
ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
|
||||||
|
}
|
||||||
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
|
av_cold void ff_noise_init_x86(NoiseContext *n)
|
||||||
|
{
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (INLINE_MMX(cpu_flags)) {
|
||||||
|
n->line_noise = line_noise_mmx;
|
||||||
|
#if HAVE_6REGS
|
||||||
|
n->line_noise_avg = line_noise_avg_mmx;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (INLINE_MMXEXT(cpu_flags)) {
|
||||||
|
n->line_noise = line_noise_mmxext;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user