Add gradfun filter, ported from MPlayer.
Patch by Nolan L nol888 <=> gmail >=< com. See thread: Subject: [FFmpeg-devel] [PATCH] Port gradfun to libavfilter (GCI) Date: Mon, 29 Nov 2010 07:18:14 -0500 Originally committed as revision 25942 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
9d845ca40c
commit
d5f187fd33
@ -64,6 +64,7 @@ version <next>:
|
||||
- hqdn3d filter added
|
||||
- RTP depacketization of QCELP
|
||||
- FLAC parser added
|
||||
- gradfun filter added
|
||||
|
||||
|
||||
version 0.6:
|
||||
|
@ -425,6 +425,35 @@ frei0r=perspective:0.2/0.2:0.8/0.2
|
||||
For more information see:
|
||||
@url{http://piksel.org/frei0r}
|
||||
|
||||
@section gradfun
|
||||
|
||||
Fix the banding artifacts that are sometimes introduced into nearly flat
|
||||
regions by truncation to 8bit colordepth.
|
||||
Interpolate the gradients that should go where the bands are, and
|
||||
dither them.
|
||||
|
||||
The filter takes two optional parameters, separated by ':':
|
||||
@var{strength}:@var{radius}
|
||||
|
||||
@var{strength} is the maximum amount by which the filter will change
|
||||
any one pixel. Also the threshold for detecting nearly flat
|
||||
regions. Acceptable values range from .51 to 255, default value is
|
||||
1.2, out-of-range values will be clipped to the valid range.
|
||||
|
||||
@var{radius} is the neighborhood to fit the gradient to. A larger
|
||||
radius makes for smoother gradients, but also prevents the filter from
|
||||
modifying the pixels near detailed regions. Acceptable values are
|
||||
8-32, default value is 16, out-of-range values will be clipped to the
|
||||
valid range.
|
||||
|
||||
@example
|
||||
# default parameters
|
||||
gradfun=1.2:16
|
||||
|
||||
# omitting radius
|
||||
gradfun=1.2
|
||||
@end example
|
||||
|
||||
@section hflip
|
||||
|
||||
Flip the input video horizontally.
|
||||
|
@ -26,6 +26,7 @@ OBJS-$(CONFIG_DRAWBOX_FILTER) += vf_drawbox.o
|
||||
OBJS-$(CONFIG_FIFO_FILTER) += vf_fifo.o
|
||||
OBJS-$(CONFIG_FORMAT_FILTER) += vf_format.o
|
||||
OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o
|
||||
OBJS-$(CONFIG_GRADFUN_FILTER) += vf_gradfun.o
|
||||
OBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o
|
||||
OBJS-$(CONFIG_HQDN3D_FILTER) += vf_hqdn3d.o
|
||||
OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o
|
||||
|
@ -47,6 +47,7 @@ void avfilter_register_all(void)
|
||||
REGISTER_FILTER (FIFO, fifo, vf);
|
||||
REGISTER_FILTER (FORMAT, format, vf);
|
||||
REGISTER_FILTER (FREI0R, frei0r, vf);
|
||||
REGISTER_FILTER (GRADFUN, gradfun, vf);
|
||||
REGISTER_FILTER (HFLIP, hflip, vf);
|
||||
REGISTER_FILTER (HQDN3D, hqdn3d, vf);
|
||||
REGISTER_FILTER (NOFORMAT, noformat, vf);
|
||||
|
@ -27,8 +27,8 @@
|
||||
#include "libavcore/samplefmt.h"
|
||||
|
||||
#define LIBAVFILTER_VERSION_MAJOR 1
|
||||
#define LIBAVFILTER_VERSION_MINOR 68
|
||||
#define LIBAVFILTER_VERSION_MICRO 1
|
||||
#define LIBAVFILTER_VERSION_MINOR 69
|
||||
#define LIBAVFILTER_VERSION_MICRO 0
|
||||
|
||||
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
|
||||
LIBAVFILTER_VERSION_MINOR, \
|
||||
|
48
libavfilter/gradfun.h
Normal file
48
libavfilter/gradfun.h
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Nolan Lum <nol888@gmail.com>
|
||||
* Copyright (c) 2009 Loren Merritt <lorenm@u.washignton.edu>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_GRADFUN_H
|
||||
#define AVFILTER_GRADFUN_H
|
||||
|
||||
#include "avfilter.h"
|
||||
|
||||
/// Holds instance-specific information for gradfun.
|
||||
typedef struct {
|
||||
int thresh; ///< threshold for gradient algorithm
|
||||
int radius; ///< blur radius
|
||||
int chroma_w; ///< width of the chroma planes
|
||||
int chroma_h; ///< weight of the chroma planes
|
||||
int chroma_r; ///< blur radius for the chroma planes
|
||||
uint16_t *buf; ///< holds image data for blur algorithm passed into filter.
|
||||
/// DSP functions.
|
||||
void (*filter_line) (uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
|
||||
void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width);
|
||||
} GradFunContext;
|
||||
|
||||
void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
|
||||
void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width);
|
||||
|
||||
void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
|
||||
void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers);
|
||||
|
||||
void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width);
|
||||
|
||||
#endif /* AVFILTER_GRADFUN_H */
|
253
libavfilter/vf_gradfun.c
Normal file
253
libavfilter/vf_gradfun.c
Normal file
@ -0,0 +1,253 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Nolan Lum <nol888@gmail.com>
|
||||
* Copyright (c) 2009 Loren Merritt <lorenm@u.washignton.edu>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* gradfun debanding filter, ported from MPlayer
|
||||
* libmpcodecs/vf_gradfun.c
|
||||
*
|
||||
* Apply a boxblur debanding algorithm (based on the gradfun2db
|
||||
* Avisynth filter by prunedtree).
|
||||
* Foreach pixel, if it's within threshold of the blurred value, make it closer.
|
||||
* So now we have a smoothed and higher bitdepth version of all the shallow
|
||||
* gradients, while leaving detailed areas untouched.
|
||||
* Dither it back to 8bit.
|
||||
*/
|
||||
|
||||
#include "libavcore/imgutils.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "avfilter.h"
|
||||
#include "gradfun.h"
|
||||
|
||||
DECLARE_ALIGNED(16, static const uint16_t, dither)[8][8] = {
|
||||
{0x00,0x60,0x18,0x78,0x06,0x66,0x1E,0x7E},
|
||||
{0x40,0x20,0x58,0x38,0x46,0x26,0x5E,0x3E},
|
||||
{0x10,0x70,0x08,0x68,0x16,0x76,0x0E,0x6E},
|
||||
{0x50,0x30,0x48,0x28,0x56,0x36,0x4E,0x2E},
|
||||
{0x04,0x64,0x1C,0x7C,0x02,0x62,0x1A,0x7A},
|
||||
{0x44,0x24,0x5C,0x3C,0x42,0x22,0x5A,0x3A},
|
||||
{0x14,0x74,0x0C,0x6C,0x12,0x72,0x0A,0x6A},
|
||||
{0x54,0x34,0x4C,0x2C,0x52,0x32,0x4A,0x2A},
|
||||
};
|
||||
|
||||
void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
|
||||
{
|
||||
int x;
|
||||
for (x = 0; x < width; x++, dc += x & 1) {
|
||||
int pix = src[x] << 7;
|
||||
int delta = dc[0] - pix;
|
||||
int m = abs(delta) * thresh >> 16;
|
||||
m = FFMAX(0, 127 - m);
|
||||
m = m * m * delta >> 14;
|
||||
pix += m + dithers[x & 7];
|
||||
dst[x] = av_clip_uint8(pix >> 7);
|
||||
}
|
||||
}
|
||||
|
||||
void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width)
|
||||
{
|
||||
int x, v, old;
|
||||
for (x = 0; x < width; x++) {
|
||||
v = buf1[x] + src[2 * x] + src[2 * x + 1] + src[2 * x + src_linesize] + src[2 * x + 1 + src_linesize];
|
||||
old = buf[x];
|
||||
buf[x] = v;
|
||||
dc[x] = v - old;
|
||||
}
|
||||
}
|
||||
|
||||
static void filter(GradFunContext *ctx, uint8_t *dst, uint8_t *src, int width, int height, int dst_linesize, int src_linesize, int r)
|
||||
{
|
||||
int bstride = FFALIGN(width, 16) / 2;
|
||||
int y;
|
||||
uint32_t dc_factor = (1 << 21) / (r * r);
|
||||
uint16_t *dc = ctx->buf + 16;
|
||||
uint16_t *buf = ctx->buf + bstride + 32;
|
||||
int thresh = ctx->thresh;
|
||||
|
||||
memset(dc, 0, (bstride + 16) * sizeof(*buf));
|
||||
for (y = 0; y < r; y++)
|
||||
ctx->blur_line(dc, buf + y * bstride, buf + (y - 1) * bstride, src + 2 * y * src_linesize, src_linesize, width / 2);
|
||||
for (;;) {
|
||||
if (y < height - r) {
|
||||
int mod = ((y + r) / 2) % r;
|
||||
uint16_t *buf0 = buf + mod * bstride;
|
||||
uint16_t *buf1 = buf + (mod ? mod - 1 : r - 1) * bstride;
|
||||
int x, v;
|
||||
ctx->blur_line(dc, buf0, buf1, src + (y + r) * src_linesize, src_linesize, width / 2);
|
||||
for (x = v = 0; x < r; x++)
|
||||
v += dc[x];
|
||||
for (; x < width / 2; x++) {
|
||||
v += dc[x] - dc[x-r];
|
||||
dc[x-r] = v * dc_factor >> 16;
|
||||
}
|
||||
for (; x < (width + r + 1) / 2; x++)
|
||||
dc[x-r] = v * dc_factor >> 16;
|
||||
for (x = -r / 2; x < 0; x++)
|
||||
dc[x] = dc[0];
|
||||
}
|
||||
if (y == r) {
|
||||
for (y = 0; y < r; y++)
|
||||
ctx->filter_line(dst + y * dst_linesize, src + y * src_linesize, dc - r / 2, width, thresh, dither[y & 7]);
|
||||
}
|
||||
ctx->filter_line(dst + y * dst_linesize, src + y * src_linesize, dc - r / 2, width, thresh, dither[y & 7]);
|
||||
if (++y >= height) break;
|
||||
ctx->filter_line(dst + y * dst_linesize, src + y * src_linesize, dc - r / 2, width, thresh, dither[y & 7]);
|
||||
if (++y >= height) break;
|
||||
}
|
||||
}
|
||||
|
||||
static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
|
||||
{
|
||||
GradFunContext *gf = ctx->priv;
|
||||
float thresh = 1.2;
|
||||
int radius = 16;
|
||||
av_unused int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (args)
|
||||
sscanf(args, "%f:%d", &thresh, &radius);
|
||||
|
||||
thresh = av_clipf(thresh, 0.51, 255);
|
||||
gf->thresh = (1 << 15) / thresh;
|
||||
gf->radius = av_clip((radius + 1) & ~1, 4, 32);
|
||||
|
||||
gf->blur_line = ff_gradfun_blur_line_c;
|
||||
gf->filter_line = ff_gradfun_filter_line_c;
|
||||
|
||||
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2)
|
||||
gf->filter_line = ff_gradfun_filter_line_mmx2;
|
||||
if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
|
||||
gf->filter_line = ff_gradfun_filter_line_ssse3;
|
||||
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
|
||||
gf->blur_line = ff_gradfun_blur_line_sse2;
|
||||
|
||||
av_log(ctx, AV_LOG_INFO, "threshold:%.2f radius:%d\n", thresh, gf->radius);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static av_cold void uninit(AVFilterContext *ctx)
|
||||
{
|
||||
GradFunContext *gf = ctx->priv;
|
||||
av_freep(&gf->buf);
|
||||
}
|
||||
|
||||
static int query_formats(AVFilterContext *ctx)
|
||||
{
|
||||
static const enum PixelFormat pix_fmts[] = {
|
||||
PIX_FMT_YUV410P, PIX_FMT_YUV420P,
|
||||
PIX_FMT_GRAY8, PIX_FMT_NV12,
|
||||
PIX_FMT_NV21, PIX_FMT_YUV444P,
|
||||
PIX_FMT_YUV422P, PIX_FMT_YUV411P,
|
||||
PIX_FMT_NONE
|
||||
};
|
||||
|
||||
avfilter_set_common_formats(ctx, avfilter_make_format_list(pix_fmts));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int config_input(AVFilterLink *inlink)
|
||||
{
|
||||
GradFunContext *gf = inlink->dst->priv;
|
||||
int hsub = av_pix_fmt_descriptors[inlink->format].log2_chroma_w;
|
||||
int vsub = av_pix_fmt_descriptors[inlink->format].log2_chroma_h;
|
||||
|
||||
gf->buf = av_mallocz((FFALIGN(inlink->w, 16) * (gf->radius + 1) / 2 + 32) * sizeof(uint16_t));
|
||||
if (!gf->buf)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
gf->chroma_w = -((-inlink->w) >> hsub);
|
||||
gf->chroma_h = -((-inlink->h) >> vsub);
|
||||
gf->chroma_r = av_clip(((((gf->radius >> hsub) + (gf->radius >> vsub)) / 2 ) + 1) & ~1, 4, 32);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *inpicref)
|
||||
{
|
||||
AVFilterLink *outlink = inlink->dst->outputs[0];
|
||||
AVFilterBufferRef *outpicref;
|
||||
|
||||
if (inpicref->perms & AV_PERM_PRESERVE) {
|
||||
outpicref = avfilter_get_video_buffer(outlink, AV_PERM_WRITE, outlink->w, outlink->h);
|
||||
avfilter_copy_buffer_ref_props(outpicref, inpicref);
|
||||
outpicref->video->w = outlink->w;
|
||||
outpicref->video->h = outlink->h;
|
||||
} else
|
||||
outpicref = inpicref;
|
||||
|
||||
outlink->out_buf = outpicref;
|
||||
avfilter_start_frame(outlink, avfilter_ref_buffer(outpicref, ~0));
|
||||
}
|
||||
|
||||
static void null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir) { }
|
||||
|
||||
static void end_frame(AVFilterLink *inlink)
|
||||
{
|
||||
GradFunContext *gf = inlink->dst->priv;
|
||||
AVFilterBufferRef *inpic = inlink->cur_buf;
|
||||
AVFilterLink *outlink = inlink->dst->outputs[0];
|
||||
AVFilterBufferRef *outpic = outlink->out_buf;
|
||||
int p;
|
||||
|
||||
for (p = 0; p < 4 && inpic->data[p]; p++) {
|
||||
int w = inlink->w;
|
||||
int h = inlink->h;
|
||||
int r = gf->radius;
|
||||
if (p) {
|
||||
w = gf->chroma_w;
|
||||
h = gf->chroma_h;
|
||||
r = gf->chroma_r;
|
||||
}
|
||||
|
||||
if (FFMIN(w, h) > 2 * r)
|
||||
filter(gf, outpic->data[p], inpic->data[p], w, h, outpic->linesize[p], inpic->linesize[p], r);
|
||||
else if (outpic->data[p] != inpic->data[p])
|
||||
av_image_copy_plane(outpic->data[p], outpic->linesize[p], inpic->data[p], inpic->linesize[p], w, h);
|
||||
}
|
||||
|
||||
avfilter_draw_slice(outlink, 0, inlink->h, 1);
|
||||
avfilter_end_frame(outlink);
|
||||
avfilter_unref_buffer(inpic);
|
||||
avfilter_unref_buffer(outpic);
|
||||
}
|
||||
|
||||
AVFilter avfilter_vf_gradfun = {
|
||||
.name = "gradfun",
|
||||
.description = NULL_IF_CONFIG_SMALL("Debands video quickly using gradients."),
|
||||
.priv_size = sizeof(GradFunContext),
|
||||
.init = init,
|
||||
.uninit = uninit,
|
||||
.query_formats = query_formats,
|
||||
|
||||
.inputs = (AVFilterPad[]) {{ .name = "default",
|
||||
.type = AVMEDIA_TYPE_VIDEO,
|
||||
.config_props = config_input,
|
||||
.start_frame = start_frame,
|
||||
.draw_slice = null_draw_slice,
|
||||
.end_frame = end_frame,
|
||||
.min_perms = AV_PERM_READ, },
|
||||
{ .name = NULL}},
|
||||
.outputs = (AVFilterPad[]) {{ .name = "default",
|
||||
.type = AVMEDIA_TYPE_VIDEO, },
|
||||
{ .name = NULL}},
|
||||
};
|
@ -1 +1,2 @@
|
||||
MMX-OBJS-$(CONFIG_YADIF_FILTER) += x86/yadif.o
|
||||
MMX-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/gradfun.o
|
||||
|
162
libavfilter/x86/gradfun.c
Normal file
162
libavfilter/x86/gradfun.c
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/x86_cpu.h"
|
||||
#include "libavfilter/gradfun.h"
|
||||
|
||||
DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
|
||||
DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
||||
|
||||
void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
|
||||
{
|
||||
#if HAVE_MMX
|
||||
intptr_t x;
|
||||
if (width & 3) {
|
||||
x = width & ~3;
|
||||
ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers);
|
||||
width = x;
|
||||
}
|
||||
x = -width;
|
||||
__asm__ volatile(
|
||||
"movd %4, %%mm5 \n"
|
||||
"pxor %%mm7, %%mm7 \n"
|
||||
"pshufw $0, %%mm5, %%mm5 \n"
|
||||
"movq %6, %%mm6 \n"
|
||||
"movq %5, %%mm4 \n"
|
||||
"1: \n"
|
||||
"movd (%2,%0), %%mm0 \n"
|
||||
"movd (%3,%0), %%mm1 \n"
|
||||
"punpcklbw %%mm7, %%mm0 \n"
|
||||
"punpcklwd %%mm1, %%mm1 \n"
|
||||
"psllw $7, %%mm0 \n"
|
||||
"pxor %%mm2, %%mm2 \n"
|
||||
"psubw %%mm0, %%mm1 \n" // delta = dc - pix
|
||||
"psubw %%mm1, %%mm2 \n"
|
||||
"pmaxsw %%mm1, %%mm2 \n"
|
||||
"pmulhuw %%mm5, %%mm2 \n" // m = abs(delta) * thresh >> 16
|
||||
"psubw %%mm6, %%mm2 \n"
|
||||
"pminsw %%mm7, %%mm2 \n" // m = -max(0, 127-m)
|
||||
"pmullw %%mm2, %%mm2 \n"
|
||||
"paddw %%mm4, %%mm0 \n" // pix += dither
|
||||
"pmulhw %%mm2, %%mm1 \n"
|
||||
"psllw $2, %%mm1 \n" // m = m*m*delta >> 14
|
||||
"paddw %%mm1, %%mm0 \n" // pix += m
|
||||
"psraw $7, %%mm0 \n"
|
||||
"packuswb %%mm0, %%mm0 \n"
|
||||
"movd %%mm0, (%1,%0) \n" // dst = clip(pix>>7)
|
||||
"add $4, %0 \n"
|
||||
"jl 1b \n"
|
||||
"emms \n"
|
||||
:"+r"(x)
|
||||
:"r"(dst+width), "r"(src+width), "r"(dc+width/2),
|
||||
"rm"(thresh), "m"(*dithers), "m"(*pw_7f)
|
||||
:"memory"
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
|
||||
{
|
||||
#if HAVE_SSSE3
|
||||
intptr_t x;
|
||||
if (width & 7) {
|
||||
// could be 10% faster if I somehow eliminated this
|
||||
x = width & ~7;
|
||||
ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, width - x, thresh, dithers);
|
||||
width = x;
|
||||
}
|
||||
x = -width;
|
||||
__asm__ volatile(
|
||||
"movd %4, %%xmm5 \n"
|
||||
"pxor %%xmm7, %%xmm7 \n"
|
||||
"pshuflw $0,%%xmm5, %%xmm5 \n"
|
||||
"movdqa %6, %%xmm6 \n"
|
||||
"punpcklqdq %%xmm5, %%xmm5 \n"
|
||||
"movdqa %5, %%xmm4 \n"
|
||||
"1: \n"
|
||||
"movq (%2,%0), %%xmm0 \n"
|
||||
"movq (%3,%0), %%xmm1 \n"
|
||||
"punpcklbw %%xmm7, %%xmm0 \n"
|
||||
"punpcklwd %%xmm1, %%xmm1 \n"
|
||||
"psllw $7, %%xmm0 \n"
|
||||
"psubw %%xmm0, %%xmm1 \n" // delta = dc - pix
|
||||
"pabsw %%xmm1, %%xmm2 \n"
|
||||
"pmulhuw %%xmm5, %%xmm2 \n" // m = abs(delta) * thresh >> 16
|
||||
"psubw %%xmm6, %%xmm2 \n"
|
||||
"pminsw %%xmm7, %%xmm2 \n" // m = -max(0, 127-m)
|
||||
"pmullw %%xmm2, %%xmm2 \n"
|
||||
"psllw $1, %%xmm2 \n"
|
||||
"paddw %%xmm4, %%xmm0 \n" // pix += dither
|
||||
"pmulhrsw %%xmm2, %%xmm1 \n" // m = m*m*delta >> 14
|
||||
"paddw %%xmm1, %%xmm0 \n" // pix += m
|
||||
"psraw $7, %%xmm0 \n"
|
||||
"packuswb %%xmm0, %%xmm0 \n"
|
||||
"movq %%xmm0, (%1,%0) \n" // dst = clip(pix>>7)
|
||||
"add $8, %0 \n"
|
||||
"jl 1b \n"
|
||||
:"+&r"(x)
|
||||
:"r"(dst+width), "r"(src+width), "r"(dc+width/2),
|
||||
"rm"(thresh), "m"(*dithers), "m"(*pw_7f)
|
||||
:"memory"
|
||||
);
|
||||
#endif // HAVE_SSSE3
|
||||
}
|
||||
|
||||
void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width)
|
||||
{
|
||||
#if HAVE_SSE
|
||||
#define BLURV(load)\
|
||||
intptr_t x = -2*width;\
|
||||
__asm__ volatile(\
|
||||
"movdqa %6, %%xmm7 \n"\
|
||||
"1: \n"\
|
||||
load" (%4,%0), %%xmm0 \n"\
|
||||
load" (%5,%0), %%xmm1 \n"\
|
||||
"movdqa %%xmm0, %%xmm2 \n"\
|
||||
"movdqa %%xmm1, %%xmm3 \n"\
|
||||
"psrlw $8, %%xmm0 \n"\
|
||||
"psrlw $8, %%xmm1 \n"\
|
||||
"pand %%xmm7, %%xmm2 \n"\
|
||||
"pand %%xmm7, %%xmm3 \n"\
|
||||
"paddw %%xmm1, %%xmm0 \n"\
|
||||
"paddw %%xmm3, %%xmm2 \n"\
|
||||
"paddw %%xmm2, %%xmm0 \n"\
|
||||
"paddw (%2,%0), %%xmm0 \n"\
|
||||
"movdqa (%1,%0), %%xmm1 \n"\
|
||||
"movdqa %%xmm0, (%1,%0) \n"\
|
||||
"psubw %%xmm1, %%xmm0 \n"\
|
||||
"movdqa %%xmm0, (%3,%0) \n"\
|
||||
"add $16, %0 \n"\
|
||||
"jl 1b \n"\
|
||||
:"+&r"(x)\
|
||||
:"r"(buf+width),\
|
||||
"r"(buf1+width),\
|
||||
"r"(dc+width),\
|
||||
"r"(src+width*2),\
|
||||
"r"(src+width*2+src_linesize),\
|
||||
"m"(*pw_ff)\
|
||||
:"memory"\
|
||||
);
|
||||
if (((intptr_t) src | src_linesize) & 15) {
|
||||
BLURV("movdqu");
|
||||
} else {
|
||||
BLURV("movdqa");
|
||||
}
|
||||
#endif // HAVE_SSE
|
||||
}
|
Loading…
Reference in New Issue
Block a user