Merge remote-tracking branch 'qatar/master'
* qatar/master: SBR DSP: fix SSE code to not use SSE2 instructions. cpu: initialize mask to -1, so that by default, optimizations are used. error_resilience: initialize s->block_index[]. svq3: protect against negative quantizers. Don't use ff_cropTbl[] for IDCT. swscale: make filterPos 32bit. FATE: add CPUFLAGS variable, mapping to -cpuflags avconv option. avconv: add -cpuflags option for setting supported cpuflags. cpu: add av_set_cpu_flags_mask(). libx264: Allow overriding the sliced threads option avconv: fix counting encoded video size. Conflicts: doc/APIchanges doc/fate.texi doc/ffmpeg.texi ffmpeg.c libavcodec/h264idct_template.c libavcodec/svq3.c libavutil/avutil.h libavutil/cpu.c libavutil/cpu.h libswscale/swscale.c tests/Makefile tests/fate-run.sh tests/regression-funcs.sh Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
6df42f9874
60
cmdutils.c
60
cmdutils.c
@ -340,11 +340,8 @@ void parse_options(void *optctx, int argc, char **argv, const OptionDef *options
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
int locate_option(int argc, char **argv, const OptionDef *options,
|
||||||
* Return index of option opt in argv or 0 if not found.
|
const char *optname)
|
||||||
*/
|
|
||||||
static int locate_option(int argc, char **argv, const OptionDef *options,
|
|
||||||
const char *optname)
|
|
||||||
{
|
{
|
||||||
const OptionDef *po;
|
const OptionDef *po;
|
||||||
int i;
|
int i;
|
||||||
@ -537,13 +534,54 @@ int opt_max_alloc(const char *opt, const char *arg)
|
|||||||
|
|
||||||
int opt_cpuflags(const char *opt, const char *arg)
|
int opt_cpuflags(const char *opt, const char *arg)
|
||||||
{
|
{
|
||||||
char *tail;
|
#define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2)
|
||||||
long flags = strtol(arg, &tail, 10);
|
#define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX)
|
||||||
|
#define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW)
|
||||||
|
#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2)
|
||||||
|
#define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE)
|
||||||
|
#define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2)
|
||||||
|
#define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2)
|
||||||
|
#define CPUFLAG_SSE3SLOW (AV_CPU_FLAG_SSE3SLOW | CPUFLAG_SSE3)
|
||||||
|
#define CPUFLAG_SSSE3 (AV_CPU_FLAG_SSSE3 | CPUFLAG_SSE3)
|
||||||
|
#define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3)
|
||||||
|
#define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4)
|
||||||
|
#define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42)
|
||||||
|
#define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX)
|
||||||
|
#define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
|
||||||
|
static const AVOption cpuflags_opts[] = {
|
||||||
|
{ "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { 0 }, INT64_MIN, INT64_MAX, .unit = "flags" },
|
||||||
|
{ "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" },
|
||||||
|
{ "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" },
|
||||||
|
{ "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" },
|
||||||
|
{ "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" },
|
||||||
|
{ "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" },
|
||||||
|
{ "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" },
|
||||||
|
{ "sse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3 }, .unit = "flags" },
|
||||||
|
{ "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3SLOW }, .unit = "flags" },
|
||||||
|
{ "ssse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSSE3 }, .unit = "flags" },
|
||||||
|
{ "atom" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ATOM }, .unit = "flags" },
|
||||||
|
{ "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE4 }, .unit = "flags" },
|
||||||
|
{ "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE42 }, .unit = "flags" },
|
||||||
|
{ "avx" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_AVX }, .unit = "flags" },
|
||||||
|
{ "xop" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_XOP }, .unit = "flags" },
|
||||||
|
{ "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_FMA4 }, .unit = "flags" },
|
||||||
|
{ "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOW }, .unit = "flags" },
|
||||||
|
{ "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOWEXT }, .unit = "flags" },
|
||||||
|
{ NULL },
|
||||||
|
};
|
||||||
|
static const AVClass class = {
|
||||||
|
.class_name = "cpuflags",
|
||||||
|
.item_name = av_default_item_name,
|
||||||
|
.option = cpuflags_opts,
|
||||||
|
.version = LIBAVUTIL_VERSION_INT,
|
||||||
|
};
|
||||||
|
int flags = av_get_cpu_flags();
|
||||||
|
int ret;
|
||||||
|
const AVClass *pclass = &class;
|
||||||
|
|
||||||
|
if ((ret = av_opt_eval_flags(&pclass, &cpuflags_opts[0], arg, &flags)) < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (*tail) {
|
|
||||||
av_log(NULL, AV_LOG_FATAL, "Invalid cpuflags \"%s\".\n", arg);
|
|
||||||
exit_program(1);
|
|
||||||
}
|
|
||||||
av_force_cpu_flags(flags);
|
av_force_cpu_flags(flags);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -206,6 +206,12 @@ int parse_option(void *optctx, const char *opt, const char *arg,
|
|||||||
*/
|
*/
|
||||||
void parse_loglevel(int argc, char **argv, const OptionDef *options);
|
void parse_loglevel(int argc, char **argv, const OptionDef *options);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return index of option opt in argv or 0 if not found.
|
||||||
|
*/
|
||||||
|
int locate_option(int argc, char **argv, const OptionDef *options,
|
||||||
|
const char *optname);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the given stream matches a stream specifier.
|
* Check if the given stream matches a stream specifier.
|
||||||
*
|
*
|
||||||
|
@ -16,4 +16,4 @@
|
|||||||
{ "debug", HAS_ARG, {(void*)opt_codec_debug}, "set debug flags", "flags" },
|
{ "debug", HAS_ARG, {(void*)opt_codec_debug}, "set debug flags", "flags" },
|
||||||
{ "report", 0, {(void*)opt_report}, "generate a report" },
|
{ "report", 0, {(void*)opt_report}, "generate a report" },
|
||||||
{ "max_alloc", HAS_ARG, {(void*)opt_max_alloc}, "set maximum size of a single allocated block", "bytes" },
|
{ "max_alloc", HAS_ARG, {(void*)opt_max_alloc}, "set maximum size of a single allocated block", "bytes" },
|
||||||
{ "cpuflags", HAS_ARG, {(void*)opt_cpuflags}, "force specific cpu flags", "flags" },
|
{ "cpuflags", HAS_ARG | OPT_EXPERT, {(void*)opt_cpuflags}, "force specific cpu flags", "flags" },
|
||||||
|
@ -134,6 +134,10 @@ It also implies @code{-loglevel verbose}.
|
|||||||
Note: setting the environment variable @code{FFREPORT} to any value has the
|
Note: setting the environment variable @code{FFREPORT} to any value has the
|
||||||
same effect.
|
same effect.
|
||||||
|
|
||||||
|
@item -cpuflags flags (@emph{global})
|
||||||
|
Allows setting and clearing cpu flags. This option is intended
|
||||||
|
for testing. Do not use it unless you know what you're doing.
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@section AVOptions
|
@section AVOptions
|
||||||
|
@ -166,9 +166,11 @@ the synchronisation of the samples directory.
|
|||||||
@item THREADS
|
@item THREADS
|
||||||
Specify how many threads to use while running regression tests, it is
|
Specify how many threads to use while running regression tests, it is
|
||||||
quite useful to detect thread-related regressions.
|
quite useful to detect thread-related regressions.
|
||||||
|
@item CPUFLAGS
|
||||||
|
Specify CPU flags.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
@example
|
@example
|
||||||
make V=1 SAMPLES=/var/fate/samples THREADS=2 fate
|
make V=1 SAMPLES=/var/fate/samples THREADS=2 CPUFLAGS=mmx fate
|
||||||
@end example
|
@end example
|
9
ffmpeg.c
9
ffmpeg.c
@ -4963,6 +4963,13 @@ static int opt_deinterlace(const char *opt, const char *arg)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void parse_cpuflags(int argc, char **argv, const OptionDef *options)
|
||||||
|
{
|
||||||
|
int idx = locate_option(argc, argv, options, "cpuflags");
|
||||||
|
if (idx && argv[idx + 1])
|
||||||
|
opt_cpuflags("cpuflags", argv[idx + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
#define OFFSET(x) offsetof(OptionsContext, x)
|
#define OFFSET(x) offsetof(OptionsContext, x)
|
||||||
static const OptionDef options[] = {
|
static const OptionDef options[] = {
|
||||||
/* main options */
|
/* main options */
|
||||||
@ -5136,6 +5143,8 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
term_init();
|
term_init();
|
||||||
|
|
||||||
|
parse_cpuflags(argc, argv, options);
|
||||||
|
|
||||||
/* parse options */
|
/* parse options */
|
||||||
parse_options(&o, argc, argv, options, opt_output_file);
|
parse_options(&o, argc, argv, options, opt_output_file);
|
||||||
|
|
||||||
|
@ -367,18 +367,17 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
/* read the pixels */
|
/* read the pixels */
|
||||||
for(i=0;i<8;i++) {
|
for(i=0;i<8;i++) {
|
||||||
pixels[0] = cm[block[0]];
|
pixels[0] = av_clip_uint8(block[0]);
|
||||||
pixels[1] = cm[block[1]];
|
pixels[1] = av_clip_uint8(block[1]);
|
||||||
pixels[2] = cm[block[2]];
|
pixels[2] = av_clip_uint8(block[2]);
|
||||||
pixels[3] = cm[block[3]];
|
pixels[3] = av_clip_uint8(block[3]);
|
||||||
pixels[4] = cm[block[4]];
|
pixels[4] = av_clip_uint8(block[4]);
|
||||||
pixels[5] = cm[block[5]];
|
pixels[5] = av_clip_uint8(block[5]);
|
||||||
pixels[6] = cm[block[6]];
|
pixels[6] = av_clip_uint8(block[6]);
|
||||||
pixels[7] = cm[block[7]];
|
pixels[7] = av_clip_uint8(block[7]);
|
||||||
|
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += 8;
|
block += 8;
|
||||||
@ -389,14 +388,13 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
|
|||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
/* read the pixels */
|
/* read the pixels */
|
||||||
for(i=0;i<4;i++) {
|
for(i=0;i<4;i++) {
|
||||||
pixels[0] = cm[block[0]];
|
pixels[0] = av_clip_uint8(block[0]);
|
||||||
pixels[1] = cm[block[1]];
|
pixels[1] = av_clip_uint8(block[1]);
|
||||||
pixels[2] = cm[block[2]];
|
pixels[2] = av_clip_uint8(block[2]);
|
||||||
pixels[3] = cm[block[3]];
|
pixels[3] = av_clip_uint8(block[3]);
|
||||||
|
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += 8;
|
block += 8;
|
||||||
@ -407,12 +405,11 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
|
|||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
/* read the pixels */
|
/* read the pixels */
|
||||||
for(i=0;i<2;i++) {
|
for(i=0;i<2;i++) {
|
||||||
pixels[0] = cm[block[0]];
|
pixels[0] = av_clip_uint8(block[0]);
|
||||||
pixels[1] = cm[block[1]];
|
pixels[1] = av_clip_uint8(block[1]);
|
||||||
|
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += 8;
|
block += 8;
|
||||||
@ -444,18 +441,17 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
/* read the pixels */
|
/* read the pixels */
|
||||||
for(i=0;i<8;i++) {
|
for(i=0;i<8;i++) {
|
||||||
pixels[0] = cm[pixels[0] + block[0]];
|
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
|
||||||
pixels[1] = cm[pixels[1] + block[1]];
|
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
|
||||||
pixels[2] = cm[pixels[2] + block[2]];
|
pixels[2] = av_clip_uint8(pixels[2] + block[2]);
|
||||||
pixels[3] = cm[pixels[3] + block[3]];
|
pixels[3] = av_clip_uint8(pixels[3] + block[3]);
|
||||||
pixels[4] = cm[pixels[4] + block[4]];
|
pixels[4] = av_clip_uint8(pixels[4] + block[4]);
|
||||||
pixels[5] = cm[pixels[5] + block[5]];
|
pixels[5] = av_clip_uint8(pixels[5] + block[5]);
|
||||||
pixels[6] = cm[pixels[6] + block[6]];
|
pixels[6] = av_clip_uint8(pixels[6] + block[6]);
|
||||||
pixels[7] = cm[pixels[7] + block[7]];
|
pixels[7] = av_clip_uint8(pixels[7] + block[7]);
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += 8;
|
block += 8;
|
||||||
}
|
}
|
||||||
@ -465,14 +461,13 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
|
|||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
/* read the pixels */
|
/* read the pixels */
|
||||||
for(i=0;i<4;i++) {
|
for(i=0;i<4;i++) {
|
||||||
pixels[0] = cm[pixels[0] + block[0]];
|
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
|
||||||
pixels[1] = cm[pixels[1] + block[1]];
|
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
|
||||||
pixels[2] = cm[pixels[2] + block[2]];
|
pixels[2] = av_clip_uint8(pixels[2] + block[2]);
|
||||||
pixels[3] = cm[pixels[3] + block[3]];
|
pixels[3] = av_clip_uint8(pixels[3] + block[3]);
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += 8;
|
block += 8;
|
||||||
}
|
}
|
||||||
@ -482,12 +477,11 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
|
|||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
/* read the pixels */
|
/* read the pixels */
|
||||||
for(i=0;i<2;i++) {
|
for(i=0;i<2;i++) {
|
||||||
pixels[0] = cm[pixels[0] + block[0]];
|
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
|
||||||
pixels[1] = cm[pixels[1] + block[1]];
|
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += 8;
|
block += 8;
|
||||||
}
|
}
|
||||||
@ -2779,15 +2773,11 @@ static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|||||||
|
|
||||||
static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
|
static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
|
||||||
{
|
{
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
dest[0] = av_clip_uint8((block[0] + 4)>>3);
|
||||||
|
|
||||||
dest[0] = cm[(block[0] + 4)>>3];
|
|
||||||
}
|
}
|
||||||
static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
|
static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
|
||||||
{
|
{
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
|
||||||
|
|
||||||
dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
|
static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
|
||||||
|
@ -440,9 +440,14 @@ static void guess_mv(MpegEncContext *s)
|
|||||||
if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
|
if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
|
||||||
num_avail <= mb_width / 2) {
|
num_avail <= mb_width / 2) {
|
||||||
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
||||||
|
s->mb_x = 0;
|
||||||
|
s->mb_y = mb_y;
|
||||||
|
ff_init_block_index(s);
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
||||||
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
||||||
|
|
||||||
|
ff_update_block_index(s);
|
||||||
|
|
||||||
if (IS_INTRA(s->current_picture.f.mb_type[mb_xy]))
|
if (IS_INTRA(s->current_picture.f.mb_type[mb_xy]))
|
||||||
continue;
|
continue;
|
||||||
if (!(s->error_status_table[mb_xy] & ER_MV_ERROR))
|
if (!(s->error_status_table[mb_xy] & ER_MV_ERROR))
|
||||||
@ -477,6 +482,9 @@ static void guess_mv(MpegEncContext *s)
|
|||||||
|
|
||||||
changed = 0;
|
changed = 0;
|
||||||
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
||||||
|
s->mb_x = 0;
|
||||||
|
s->mb_y = mb_y;
|
||||||
|
ff_init_block_index(s);
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
||||||
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
||||||
int mv_predictor[8][2] = { { 0 } };
|
int mv_predictor[8][2] = { { 0 } };
|
||||||
@ -488,6 +496,8 @@ static void guess_mv(MpegEncContext *s)
|
|||||||
const int mot_index = (mb_x + mb_y * mot_stride) * mot_step;
|
const int mot_index = (mb_x + mb_y * mot_stride) * mot_step;
|
||||||
int prev_x, prev_y, prev_ref;
|
int prev_x, prev_y, prev_ref;
|
||||||
|
|
||||||
|
ff_update_block_index(s);
|
||||||
|
|
||||||
if ((mb_x ^ mb_y ^ pass) & 1)
|
if ((mb_x ^ mb_y ^ pass) & 1)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -1098,11 +1108,16 @@ void ff_er_frame_end(MpegEncContext *s)
|
|||||||
|
|
||||||
/* handle inter blocks with damaged AC */
|
/* handle inter blocks with damaged AC */
|
||||||
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
||||||
|
s->mb_x = 0;
|
||||||
|
s->mb_y = mb_y;
|
||||||
|
ff_init_block_index(s);
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
||||||
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
||||||
const int mb_type = s->current_picture.f.mb_type[mb_xy];
|
const int mb_type = s->current_picture.f.mb_type[mb_xy];
|
||||||
int dir = !s->last_picture.f.data[0];
|
int dir = !s->last_picture.f.data[0];
|
||||||
|
|
||||||
|
ff_update_block_index(s);
|
||||||
|
|
||||||
error = s->error_status_table[mb_xy];
|
error = s->error_status_table[mb_xy];
|
||||||
|
|
||||||
if (IS_INTRA(mb_type))
|
if (IS_INTRA(mb_type))
|
||||||
@ -1140,11 +1155,16 @@ void ff_er_frame_end(MpegEncContext *s)
|
|||||||
/* guess MVs */
|
/* guess MVs */
|
||||||
if (s->pict_type == AV_PICTURE_TYPE_B) {
|
if (s->pict_type == AV_PICTURE_TYPE_B) {
|
||||||
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
||||||
|
s->mb_x = 0;
|
||||||
|
s->mb_y = mb_y;
|
||||||
|
ff_init_block_index(s);
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
||||||
int xy = mb_x * 2 + mb_y * 2 * s->b8_stride;
|
int xy = mb_x * 2 + mb_y * 2 * s->b8_stride;
|
||||||
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
const int mb_xy = mb_x + mb_y * s->mb_stride;
|
||||||
const int mb_type = s->current_picture.f.mb_type[mb_xy];
|
const int mb_type = s->current_picture.f.mb_type[mb_xy];
|
||||||
|
|
||||||
|
ff_update_block_index(s);
|
||||||
|
|
||||||
error = s->error_status_table[mb_xy];
|
error = s->error_status_table[mb_xy];
|
||||||
|
|
||||||
if (IS_INTRA(mb_type))
|
if (IS_INTRA(mb_type))
|
||||||
|
@ -49,7 +49,6 @@ static const uint8_t scan8[16*3]={
|
|||||||
void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
|
void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
INIT_CLIP
|
|
||||||
pixel *dst = (pixel*)_dst;
|
pixel *dst = (pixel*)_dst;
|
||||||
dctcoef *block = (dctcoef*)_block;
|
dctcoef *block = (dctcoef*)_block;
|
||||||
stride >>= sizeof(pixel)-1;
|
stride >>= sizeof(pixel)-1;
|
||||||
@ -74,16 +73,15 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
|
|||||||
const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i];
|
const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i];
|
||||||
const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1);
|
const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1);
|
||||||
|
|
||||||
dst[i + 0*stride]= CLIP(dst[i + 0*stride] + ((z0 + z3) >> 6));
|
dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((z0 + z3) >> 6));
|
||||||
dst[i + 1*stride]= CLIP(dst[i + 1*stride] + ((z1 + z2) >> 6));
|
dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((z1 + z2) >> 6));
|
||||||
dst[i + 2*stride]= CLIP(dst[i + 2*stride] + ((z1 - z2) >> 6));
|
dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((z1 - z2) >> 6));
|
||||||
dst[i + 3*stride]= CLIP(dst[i + 3*stride] + ((z0 - z3) >> 6));
|
dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((z0 - z3) >> 6));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
|
void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
|
||||||
int i;
|
int i;
|
||||||
INIT_CLIP
|
|
||||||
pixel *dst = (pixel*)_dst;
|
pixel *dst = (pixel*)_dst;
|
||||||
dctcoef *block = (dctcoef*)_block;
|
dctcoef *block = (dctcoef*)_block;
|
||||||
stride >>= sizeof(pixel)-1;
|
stride >>= sizeof(pixel)-1;
|
||||||
@ -143,14 +141,14 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
|
|||||||
const int b5 = (a3>>2) - a5;
|
const int b5 = (a3>>2) - a5;
|
||||||
const int b7 = a7 - (a1>>2);
|
const int b7 = a7 - (a1>>2);
|
||||||
|
|
||||||
dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) );
|
dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((b0 + b7) >> 6) );
|
||||||
dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) );
|
dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((b2 + b5) >> 6) );
|
||||||
dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) );
|
dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((b4 + b3) >> 6) );
|
||||||
dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) );
|
dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((b6 + b1) >> 6) );
|
||||||
dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) );
|
dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((b6 - b1) >> 6) );
|
||||||
dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) );
|
dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((b4 - b3) >> 6) );
|
||||||
dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) );
|
dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((b2 - b5) >> 6) );
|
||||||
dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) );
|
dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((b0 - b7) >> 6) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -158,13 +156,12 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
|
|||||||
void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
|
void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
|
||||||
int i, j;
|
int i, j;
|
||||||
int dc = (((dctcoef*)block)[0] + 32) >> 6;
|
int dc = (((dctcoef*)block)[0] + 32) >> 6;
|
||||||
INIT_CLIP
|
|
||||||
pixel *dst = (pixel*)p_dst;
|
pixel *dst = (pixel*)p_dst;
|
||||||
stride >>= sizeof(pixel)-1;
|
stride >>= sizeof(pixel)-1;
|
||||||
for( j = 0; j < 4; j++ )
|
for( j = 0; j < 4; j++ )
|
||||||
{
|
{
|
||||||
for( i = 0; i < 4; i++ )
|
for( i = 0; i < 4; i++ )
|
||||||
dst[i] = CLIP( dst[i] + dc );
|
dst[i] = av_clip_pixel( dst[i] + dc );
|
||||||
dst += stride;
|
dst += stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -172,13 +169,12 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
|
|||||||
void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
|
void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
|
||||||
int i, j;
|
int i, j;
|
||||||
int dc = (((dctcoef*)block)[0] + 32) >> 6;
|
int dc = (((dctcoef*)block)[0] + 32) >> 6;
|
||||||
INIT_CLIP
|
|
||||||
pixel *dst = (pixel*)p_dst;
|
pixel *dst = (pixel*)p_dst;
|
||||||
stride >>= sizeof(pixel)-1;
|
stride >>= sizeof(pixel)-1;
|
||||||
for( j = 0; j < 8; j++ )
|
for( j = 0; j < 8; j++ )
|
||||||
{
|
{
|
||||||
for( i = 0; i < 8; i++ )
|
for( i = 0; i < 8; i++ )
|
||||||
dst[i] = CLIP( dst[i] + dc );
|
dst[i] = av_clip_pixel( dst[i] + dc );
|
||||||
dst += stride;
|
dst += stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -454,6 +454,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
|
|||||||
x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
|
x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
|
||||||
|
|
||||||
x4->params.i_threads = avctx->thread_count;
|
x4->params.i_threads = avctx->thread_count;
|
||||||
|
if (avctx->thread_type)
|
||||||
|
x4->params.b_sliced_threads = avctx->thread_type == FF_THREAD_SLICE;
|
||||||
|
|
||||||
x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
|
x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
|
||||||
|
|
||||||
@ -631,6 +633,7 @@ static const AVCodecDefault x264_defaults[] = {
|
|||||||
{ "coder", "-1" },
|
{ "coder", "-1" },
|
||||||
{ "cmp", "-1" },
|
{ "cmp", "-1" },
|
||||||
{ "threads", AV_STRINGIFY(X264_THREADS_AUTO) },
|
{ "threads", AV_STRINGIFY(X264_THREADS_AUTO) },
|
||||||
|
{ "thread_type", "0" },
|
||||||
{ NULL },
|
{ NULL },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -102,15 +102,13 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
|
|||||||
|
|
||||||
static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc)
|
static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc)
|
||||||
{
|
{
|
||||||
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
cm += (13*13*dc + 0x200) >> 10;
|
dc = (13*13*dc + 0x200) >> 10;
|
||||||
|
|
||||||
for (i = 0; i < 4; i++)
|
for (i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
for (j = 0; j < 4; j++)
|
for (j = 0; j < 4; j++)
|
||||||
dst[j] = cm[ dst[j] ];
|
dst[j] = av_clip_uint8( dst[j] + dc );
|
||||||
|
|
||||||
dst += stride;
|
dst += stride;
|
||||||
}
|
}
|
||||||
|
@ -132,7 +132,6 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
|
|||||||
static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
|
static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
|
||||||
{
|
{
|
||||||
int c0, c1, c2, c3, a0, a1, a2, a3;
|
int c0, c1, c2, c3, a0, a1, a2, a3;
|
||||||
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
a0 = col[8*0];
|
a0 = col[8*0];
|
||||||
a1 = col[8*1];
|
a1 = col[8*1];
|
||||||
@ -142,13 +141,13 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col
|
|||||||
c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
|
c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
|
||||||
c1 = a1 * C1 + a3 * C2;
|
c1 = a1 * C1 + a3 * C2;
|
||||||
c3 = a1 * C2 - a3 * C1;
|
c3 = a1 * C2 - a3 * C1;
|
||||||
dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
|
dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
|
dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
|
dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
|
dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT));
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RN_SHIFT 15
|
#define RN_SHIFT 15
|
||||||
@ -160,7 +159,6 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col
|
|||||||
static inline void idct4row(DCTELEM *row)
|
static inline void idct4row(DCTELEM *row)
|
||||||
{
|
{
|
||||||
int c0, c1, c2, c3, a0, a1, a2, a3;
|
int c0, c1, c2, c3, a0, a1, a2, a3;
|
||||||
//const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
a0 = row[0];
|
a0 = row[0];
|
||||||
a1 = row[1];
|
a1 = row[1];
|
||||||
|
@ -224,50 +224,48 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
|
|||||||
DCTELEM *col)
|
DCTELEM *col)
|
||||||
{
|
{
|
||||||
int a0, a1, a2, a3, b0, b1, b2, b3;
|
int a0, a1, a2, a3, b0, b1, b2, b3;
|
||||||
INIT_CLIP;
|
|
||||||
|
|
||||||
IDCT_COLS;
|
IDCT_COLS;
|
||||||
|
|
||||||
dest[0] = CLIP((a0 + b0) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP((a1 + b1) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP((a2 + b2) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP((a3 + b3) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP((a3 - b3) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP((a2 - b2) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP((a1 - b1) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP((a0 - b0) >> COL_SHIFT);
|
dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
|
static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
|
||||||
DCTELEM *col)
|
DCTELEM *col)
|
||||||
{
|
{
|
||||||
int a0, a1, a2, a3, b0, b1, b2, b3;
|
int a0, a1, a2, a3, b0, b1, b2, b3;
|
||||||
INIT_CLIP;
|
|
||||||
|
|
||||||
IDCT_COLS;
|
IDCT_COLS;
|
||||||
|
|
||||||
dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT));
|
dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void FUNC(idctSparseCol)(DCTELEM *col)
|
static inline void FUNC(idctSparseCol)(DCTELEM *col)
|
||||||
|
@ -139,8 +139,6 @@ static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right)
|
|||||||
* @see 8.6
|
* @see 8.6
|
||||||
*/
|
*/
|
||||||
static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
|
static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
|
int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
|
||||||
int a0_sign = a0 >> 31; /* Store sign */
|
int a0_sign = a0 >> 31; /* Store sign */
|
||||||
a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
|
a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
|
||||||
@ -163,8 +161,8 @@ static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
|
|||||||
else{
|
else{
|
||||||
d = FFMIN(d, clip);
|
d = FFMIN(d, clip);
|
||||||
d = (d ^ d_sign) - d_sign; /* Restore sign */
|
d = (d ^ d_sign) - d_sign; /* Restore sign */
|
||||||
src[-1*stride] = cm[src[-1*stride] - d];
|
src[-1*stride] = av_clip_uint8(src[-1*stride] - d);
|
||||||
src[ 0*stride] = cm[src[ 0*stride] + d];
|
src[ 0*stride] = av_clip_uint8(src[ 0*stride] + d);
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -234,19 +232,17 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
const uint8_t *cm;
|
|
||||||
dc = (3 * dc + 1) >> 1;
|
dc = (3 * dc + 1) >> 1;
|
||||||
dc = (3 * dc + 16) >> 5;
|
dc = (3 * dc + 16) >> 5;
|
||||||
cm = ff_cropTbl + MAX_NEG_CROP + dc;
|
|
||||||
for(i = 0; i < 8; i++){
|
for(i = 0; i < 8; i++){
|
||||||
dest[0] = cm[dest[0]];
|
dest[0] = av_clip_uint8(dest[0] + dc);
|
||||||
dest[1] = cm[dest[1]];
|
dest[1] = av_clip_uint8(dest[1] + dc);
|
||||||
dest[2] = cm[dest[2]];
|
dest[2] = av_clip_uint8(dest[2] + dc);
|
||||||
dest[3] = cm[dest[3]];
|
dest[3] = av_clip_uint8(dest[3] + dc);
|
||||||
dest[4] = cm[dest[4]];
|
dest[4] = av_clip_uint8(dest[4] + dc);
|
||||||
dest[5] = cm[dest[5]];
|
dest[5] = av_clip_uint8(dest[5] + dc);
|
||||||
dest[6] = cm[dest[6]];
|
dest[6] = av_clip_uint8(dest[6] + dc);
|
||||||
dest[7] = cm[dest[7]];
|
dest[7] = av_clip_uint8(dest[7] + dc);
|
||||||
dest += linesize;
|
dest += linesize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -326,19 +322,17 @@ static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
const uint8_t *cm;
|
|
||||||
dc = ( 3 * dc + 1) >> 1;
|
dc = ( 3 * dc + 1) >> 1;
|
||||||
dc = (17 * dc + 64) >> 7;
|
dc = (17 * dc + 64) >> 7;
|
||||||
cm = ff_cropTbl + MAX_NEG_CROP + dc;
|
|
||||||
for(i = 0; i < 4; i++){
|
for(i = 0; i < 4; i++){
|
||||||
dest[0] = cm[dest[0]];
|
dest[0] = av_clip_uint8(dest[0] + dc);
|
||||||
dest[1] = cm[dest[1]];
|
dest[1] = av_clip_uint8(dest[1] + dc);
|
||||||
dest[2] = cm[dest[2]];
|
dest[2] = av_clip_uint8(dest[2] + dc);
|
||||||
dest[3] = cm[dest[3]];
|
dest[3] = av_clip_uint8(dest[3] + dc);
|
||||||
dest[4] = cm[dest[4]];
|
dest[4] = av_clip_uint8(dest[4] + dc);
|
||||||
dest[5] = cm[dest[5]];
|
dest[5] = av_clip_uint8(dest[5] + dc);
|
||||||
dest[6] = cm[dest[6]];
|
dest[6] = av_clip_uint8(dest[6] + dc);
|
||||||
dest[7] = cm[dest[7]];
|
dest[7] = av_clip_uint8(dest[7] + dc);
|
||||||
dest += linesize;
|
dest += linesize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -403,15 +397,13 @@ static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
const uint8_t *cm;
|
|
||||||
dc = (17 * dc + 4) >> 3;
|
dc = (17 * dc + 4) >> 3;
|
||||||
dc = (12 * dc + 64) >> 7;
|
dc = (12 * dc + 64) >> 7;
|
||||||
cm = ff_cropTbl + MAX_NEG_CROP + dc;
|
|
||||||
for(i = 0; i < 8; i++){
|
for(i = 0; i < 8; i++){
|
||||||
dest[0] = cm[dest[0]];
|
dest[0] = av_clip_uint8(dest[0] + dc);
|
||||||
dest[1] = cm[dest[1]];
|
dest[1] = av_clip_uint8(dest[1] + dc);
|
||||||
dest[2] = cm[dest[2]];
|
dest[2] = av_clip_uint8(dest[2] + dc);
|
||||||
dest[3] = cm[dest[3]];
|
dest[3] = av_clip_uint8(dest[3] + dc);
|
||||||
dest += linesize;
|
dest += linesize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -476,15 +468,13 @@ static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
const uint8_t *cm;
|
|
||||||
dc = (17 * dc + 4) >> 3;
|
dc = (17 * dc + 4) >> 3;
|
||||||
dc = (17 * dc + 64) >> 7;
|
dc = (17 * dc + 64) >> 7;
|
||||||
cm = ff_cropTbl + MAX_NEG_CROP + dc;
|
|
||||||
for(i = 0; i < 4; i++){
|
for(i = 0; i < 4; i++){
|
||||||
dest[0] = cm[dest[0]];
|
dest[0] = av_clip_uint8(dest[0] + dc);
|
||||||
dest[1] = cm[dest[1]];
|
dest[1] = av_clip_uint8(dest[1] + dc);
|
||||||
dest[2] = cm[dest[2]];
|
dest[2] = av_clip_uint8(dest[2] + dc);
|
||||||
dest[3] = cm[dest[3]];
|
dest[3] = av_clip_uint8(dest[3] + dc);
|
||||||
dest += linesize;
|
dest += linesize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,6 @@
|
|||||||
static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
|
static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
|
||||||
{
|
{
|
||||||
int16_t *ip = input;
|
int16_t *ip = input;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
|
|
||||||
int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
|
int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
|
||||||
int Ed, Gd, Add, Bdd, Fd, Hd;
|
int Ed, Gd, Add, Bdd, Fd, Hd;
|
||||||
@ -147,29 +146,29 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
|
|||||||
ip[5*8] = (Fd + Bdd ) >> 4;
|
ip[5*8] = (Fd + Bdd ) >> 4;
|
||||||
ip[6*8] = (Fd - Bdd ) >> 4;
|
ip[6*8] = (Fd - Bdd ) >> 4;
|
||||||
}else if(type==1){
|
}else if(type==1){
|
||||||
dst[0*stride] = cm[(Gd + Cd ) >> 4];
|
dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4);
|
||||||
dst[7*stride] = cm[(Gd - Cd ) >> 4];
|
dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4);
|
||||||
|
|
||||||
dst[1*stride] = cm[(Add + Hd ) >> 4];
|
dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
|
||||||
dst[2*stride] = cm[(Add - Hd ) >> 4];
|
dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
|
||||||
|
|
||||||
dst[3*stride] = cm[(Ed + Dd ) >> 4];
|
dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4);
|
||||||
dst[4*stride] = cm[(Ed - Dd ) >> 4];
|
dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4);
|
||||||
|
|
||||||
dst[5*stride] = cm[(Fd + Bdd ) >> 4];
|
dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
|
||||||
dst[6*stride] = cm[(Fd - Bdd ) >> 4];
|
dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
|
||||||
}else{
|
}else{
|
||||||
dst[0*stride] = cm[dst[0*stride] + ((Gd + Cd ) >> 4)];
|
dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4));
|
||||||
dst[7*stride] = cm[dst[7*stride] + ((Gd - Cd ) >> 4)];
|
dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4));
|
||||||
|
|
||||||
dst[1*stride] = cm[dst[1*stride] + ((Add + Hd ) >> 4)];
|
dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
|
||||||
dst[2*stride] = cm[dst[2*stride] + ((Add - Hd ) >> 4)];
|
dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
|
||||||
|
|
||||||
dst[3*stride] = cm[dst[3*stride] + ((Ed + Dd ) >> 4)];
|
dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4));
|
||||||
dst[4*stride] = cm[dst[4*stride] + ((Ed - Dd ) >> 4)];
|
dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4));
|
||||||
|
|
||||||
dst[5*stride] = cm[dst[5*stride] + ((Fd + Bdd ) >> 4)];
|
dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
|
||||||
dst[6*stride] = cm[dst[6*stride] + ((Fd - Bdd ) >> 4)];
|
dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -190,18 +189,18 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
|
|||||||
dst[4*stride]=
|
dst[4*stride]=
|
||||||
dst[5*stride]=
|
dst[5*stride]=
|
||||||
dst[6*stride]=
|
dst[6*stride]=
|
||||||
dst[7*stride]= cm[128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)];
|
dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20));
|
||||||
}else{
|
}else{
|
||||||
if(ip[0*8]){
|
if(ip[0*8]){
|
||||||
int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
|
int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
|
||||||
dst[0*stride] = cm[dst[0*stride] + v];
|
dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
|
||||||
dst[1*stride] = cm[dst[1*stride] + v];
|
dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
|
||||||
dst[2*stride] = cm[dst[2*stride] + v];
|
dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
|
||||||
dst[3*stride] = cm[dst[3*stride] + v];
|
dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
|
||||||
dst[4*stride] = cm[dst[4*stride] + v];
|
dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
|
||||||
dst[5*stride] = cm[dst[5*stride] + v];
|
dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
|
||||||
dst[6*stride] = cm[dst[6*stride] + v];
|
dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
|
||||||
dst[7*stride] = cm[dst[7*stride] + v];
|
dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -225,17 +224,16 @@ void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*
|
|||||||
|
|
||||||
void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){
|
void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){
|
||||||
int i, dc = (block[0] + 15) >> 5;
|
int i, dc = (block[0] + 15) >> 5;
|
||||||
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
|
|
||||||
|
|
||||||
for(i = 0; i < 8; i++){
|
for(i = 0; i < 8; i++){
|
||||||
dest[0] = cm[dest[0]];
|
dest[0] = av_clip_uint8(dest[0] + dc);
|
||||||
dest[1] = cm[dest[1]];
|
dest[1] = av_clip_uint8(dest[1] + dc);
|
||||||
dest[2] = cm[dest[2]];
|
dest[2] = av_clip_uint8(dest[2] + dc);
|
||||||
dest[3] = cm[dest[3]];
|
dest[3] = av_clip_uint8(dest[3] + dc);
|
||||||
dest[4] = cm[dest[4]];
|
dest[4] = av_clip_uint8(dest[4] + dc);
|
||||||
dest[5] = cm[dest[5]];
|
dest[5] = av_clip_uint8(dest[5] + dc);
|
||||||
dest[6] = cm[dest[6]];
|
dest[6] = av_clip_uint8(dest[6] + dc);
|
||||||
dest[7] = cm[dest[7]];
|
dest[7] = av_clip_uint8(dest[7] + dc);
|
||||||
dest += line_size;
|
dest += line_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -80,7 +80,6 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
|
|||||||
static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
|
static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
int i, t0, t1, t2, t3;
|
int i, t0, t1, t2, t3;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
|
||||||
DCTELEM tmp[16];
|
DCTELEM tmp[16];
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
@ -105,10 +104,10 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
|
|||||||
t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
|
t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
|
||||||
t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
|
t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
|
||||||
|
|
||||||
dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)];
|
dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
|
||||||
dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)];
|
dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
|
||||||
dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)];
|
dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
|
||||||
dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)];
|
dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
|
||||||
dst += stride;
|
dst += stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -116,14 +115,13 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
|
|||||||
static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
|
static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
int i, dc = (block[0] + 4) >> 3;
|
int i, dc = (block[0] + 4) >> 3;
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
|
|
||||||
block[0] = 0;
|
block[0] = 0;
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
dst[0] = cm[dst[0]];
|
dst[0] = av_clip_uint8(dst[0] + dc);
|
||||||
dst[1] = cm[dst[1]];
|
dst[1] = av_clip_uint8(dst[1] + dc);
|
||||||
dst[2] = cm[dst[2]];
|
dst[2] = av_clip_uint8(dst[2] + dc);
|
||||||
dst[3] = cm[dst[3]];
|
dst[3] = av_clip_uint8(dst[3] + dc);
|
||||||
dst += stride;
|
dst += stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -104,7 +104,7 @@ cglobal sbr_hf_g_filt, 5, 6, 5
|
|||||||
movq m2, [r1]
|
movq m2, [r1]
|
||||||
punpckldq m0, m0
|
punpckldq m0, m0
|
||||||
mulps m2, m0
|
mulps m2, m0
|
||||||
movq [r0], m2
|
movlps [r0], m2
|
||||||
add r0, 8
|
add r0, 8
|
||||||
add r2, 4
|
add r2, 4
|
||||||
add r1, STEP
|
add r1, STEP
|
||||||
|
@ -153,7 +153,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_MAJOR 51
|
#define LIBAVUTIL_VERSION_MAJOR 51
|
||||||
#define LIBAVUTIL_VERSION_MINOR 41
|
#define LIBAVUTIL_VERSION_MINOR 42
|
||||||
#define LIBAVUTIL_VERSION_MICRO 100
|
#define LIBAVUTIL_VERSION_MICRO 100
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
||||||
|
@ -23,7 +23,7 @@ static int flags, checked;
|
|||||||
|
|
||||||
void av_force_cpu_flags(int arg){
|
void av_force_cpu_flags(int arg){
|
||||||
flags = arg;
|
flags = arg;
|
||||||
checked = 1;
|
checked = arg != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int av_get_cpu_flags(void)
|
int av_get_cpu_flags(void)
|
||||||
@ -39,6 +39,13 @@ int av_get_cpu_flags(void)
|
|||||||
return flags;
|
return flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void av_set_cpu_flags_mask(int mask)
|
||||||
|
{
|
||||||
|
checked = 0;
|
||||||
|
flags = av_get_cpu_flags() & mask;
|
||||||
|
checked = 1;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef TEST
|
#ifdef TEST
|
||||||
|
|
||||||
#undef printf
|
#undef printf
|
||||||
|
@ -21,6 +21,8 @@
|
|||||||
#ifndef AVUTIL_CPU_H
|
#ifndef AVUTIL_CPU_H
|
||||||
#define AVUTIL_CPU_H
|
#define AVUTIL_CPU_H
|
||||||
|
|
||||||
|
#include "attributes.h"
|
||||||
|
|
||||||
#define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */
|
#define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */
|
||||||
|
|
||||||
/* lower 16 bits - CPU features */
|
/* lower 16 bits - CPU features */
|
||||||
@ -49,12 +51,19 @@
|
|||||||
*/
|
*/
|
||||||
int av_get_cpu_flags(void);
|
int av_get_cpu_flags(void);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Disables cpu detection and forces the specified flags.
|
* Disables cpu detection and forces the specified flags.
|
||||||
*/
|
*/
|
||||||
void av_force_cpu_flags(int flags);
|
void av_force_cpu_flags(int flags);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set a mask on flags returned by av_get_cpu_flags().
|
||||||
|
* This function is mainly useful for testing.
|
||||||
|
* Please use av_force_cpu_flags() and av_get_cpu_flags() instead which are more flexible
|
||||||
|
*
|
||||||
|
* @warning this function is not thread safe.
|
||||||
|
*/
|
||||||
|
attribute_deprecated void av_set_cpu_flags_mask(int mask);
|
||||||
|
|
||||||
/* The following CPU-specific functions shall not be called directly. */
|
/* The following CPU-specific functions shall not be called directly. */
|
||||||
int ff_get_cpu_flags_arm(void);
|
int ff_get_cpu_flags_arm(void);
|
||||||
|
@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
|
|||||||
|
|
||||||
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
|
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
|
||||||
const uint8_t *src, const int16_t *filter,
|
const uint8_t *src, const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize)
|
const int32_t *filterPos, int filterSize)
|
||||||
{
|
{
|
||||||
register int i;
|
register int i;
|
||||||
DECLARE_ALIGNED(16, int, tempo)[4];
|
DECLARE_ALIGNED(16, int, tempo)[4];
|
||||||
|
@ -63,7 +63,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride,
|
|||||||
|
|
||||||
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
|
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
|
||||||
const int16_t *filter,
|
const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize)
|
const int32_t *filterPos, int filterSize)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int32_t *dst = (int32_t *) _dst;
|
int32_t *dst = (int32_t *) _dst;
|
||||||
@ -89,7 +89,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
|
|||||||
|
|
||||||
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
|
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
|
||||||
const int16_t *filter,
|
const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize)
|
const int32_t *filterPos, int filterSize)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
const uint16_t *src = (const uint16_t *) _src;
|
const uint16_t *src = (const uint16_t *) _src;
|
||||||
@ -113,7 +113,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
|
|||||||
|
|
||||||
// bilinear / bicubic scaling
|
// bilinear / bicubic scaling
|
||||||
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
|
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
|
||||||
const int16_t *filter, const int16_t *filterPos,
|
const int16_t *filter, const int32_t *filterPos,
|
||||||
int filterSize)
|
int filterSize)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -131,7 +131,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
|
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
|
||||||
const int16_t *filter, const int16_t *filterPos,
|
const int16_t *filter, const int32_t *filterPos,
|
||||||
int filterSize)
|
int filterSize)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -234,7 +234,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
|
|||||||
static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
|
static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
|
||||||
const uint8_t *src_in[4], int srcW, int xInc,
|
const uint8_t *src_in[4], int srcW, int xInc,
|
||||||
const int16_t *hLumFilter,
|
const int16_t *hLumFilter,
|
||||||
const int16_t *hLumFilterPos, int hLumFilterSize,
|
const int32_t *hLumFilterPos, int hLumFilterSize,
|
||||||
uint8_t *formatConvBuffer,
|
uint8_t *formatConvBuffer,
|
||||||
uint32_t *pal, int isAlpha)
|
uint32_t *pal, int isAlpha)
|
||||||
{
|
{
|
||||||
@ -282,7 +282,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
|||||||
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
|
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
|
||||||
const uint8_t *src_in[4],
|
const uint8_t *src_in[4],
|
||||||
int srcW, int xInc, const int16_t *hChrFilter,
|
int srcW, int xInc, const int16_t *hChrFilter,
|
||||||
const int16_t *hChrFilterPos, int hChrFilterSize,
|
const int32_t *hChrFilterPos, int hChrFilterSize,
|
||||||
uint8_t *formatConvBuffer, uint32_t *pal)
|
uint8_t *formatConvBuffer, uint32_t *pal)
|
||||||
{
|
{
|
||||||
const uint8_t *src1 = src_in[1], *src2 = src_in[2];
|
const uint8_t *src1 = src_in[1], *src2 = src_in[2];
|
||||||
@ -326,10 +326,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
|
|||||||
const int chrXInc= c->chrXInc;
|
const int chrXInc= c->chrXInc;
|
||||||
const enum PixelFormat dstFormat= c->dstFormat;
|
const enum PixelFormat dstFormat= c->dstFormat;
|
||||||
const int flags= c->flags;
|
const int flags= c->flags;
|
||||||
int16_t *vLumFilterPos= c->vLumFilterPos;
|
int32_t *vLumFilterPos= c->vLumFilterPos;
|
||||||
int16_t *vChrFilterPos= c->vChrFilterPos;
|
int32_t *vChrFilterPos= c->vChrFilterPos;
|
||||||
int16_t *hLumFilterPos= c->hLumFilterPos;
|
int32_t *hLumFilterPos= c->hLumFilterPos;
|
||||||
int16_t *hChrFilterPos= c->hChrFilterPos;
|
int32_t *hChrFilterPos= c->hChrFilterPos;
|
||||||
int16_t *hLumFilter= c->hLumFilter;
|
int16_t *hLumFilter= c->hLumFilter;
|
||||||
int16_t *hChrFilter= c->hChrFilter;
|
int16_t *hChrFilter= c->hChrFilter;
|
||||||
int32_t *lumMmxFilter= c->lumMmxFilter;
|
int32_t *lumMmxFilter= c->lumMmxFilter;
|
||||||
|
@ -299,10 +299,10 @@ typedef struct SwsContext {
|
|||||||
int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
|
int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
|
||||||
int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
|
int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
|
||||||
int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
|
int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
|
||||||
int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
|
int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
|
||||||
int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
|
int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
|
||||||
int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
|
int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
|
||||||
int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
|
int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
|
||||||
int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
|
int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
|
||||||
int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
|
int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
|
||||||
int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
|
int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
|
||||||
@ -515,10 +515,10 @@ typedef struct SwsContext {
|
|||||||
/** @{ */
|
/** @{ */
|
||||||
void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
|
void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
|
||||||
const uint8_t *src, const int16_t *filter,
|
const uint8_t *src, const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize);
|
const int32_t *filterPos, int filterSize);
|
||||||
void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
|
void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
|
||||||
const uint8_t *src, const int16_t *filter,
|
const uint8_t *src, const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize);
|
const int32_t *filterPos, int filterSize);
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/// Color range conversion function for luma plane if needed.
|
/// Color range conversion function for luma plane if needed.
|
||||||
|
@ -191,7 +191,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
|
|||||||
dist-1.0);
|
dist-1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
|
static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc,
|
||||||
int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags,
|
int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags,
|
||||||
SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
|
SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
|
||||||
{
|
{
|
||||||
@ -207,7 +207,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
|
|||||||
emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions)
|
emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions)
|
||||||
|
|
||||||
// NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end
|
// NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end
|
||||||
FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail);
|
FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail);
|
||||||
|
|
||||||
if (FFABS(xInc - 0x10000) <10) { // unscaled
|
if (FFABS(xInc - 0x10000) <10) { // unscaled
|
||||||
int i;
|
int i;
|
||||||
|
@ -38,7 +38,7 @@ SECTION .text
|
|||||||
; (SwsContext *c, int{16,32}_t *dst,
|
; (SwsContext *c, int{16,32}_t *dst,
|
||||||
; int dstW, const uint{8,16}_t *src,
|
; int dstW, const uint{8,16}_t *src,
|
||||||
; const int16_t *filter,
|
; const int16_t *filter,
|
||||||
; const int16_t *filterPos, int filterSize);
|
; const int32_t *filterPos, int filterSize);
|
||||||
;
|
;
|
||||||
; Scale one horizontal line. Input is either 8-bits width or 16-bits width
|
; Scale one horizontal line. Input is either 8-bits width or 16-bits width
|
||||||
; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
|
; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
|
||||||
@ -53,6 +53,9 @@ SECTION .text
|
|||||||
cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movsxd r2, r2d
|
movsxd r2, r2d
|
||||||
|
%define mov32 movsxd
|
||||||
|
%else ; x86-32
|
||||||
|
%define mov32 mov
|
||||||
%endif ; x86-64
|
%endif ; x86-64
|
||||||
%if %2 == 19
|
%if %2 == 19
|
||||||
%if mmsize == 8 ; mmx
|
%if mmsize == 8 ; mmx
|
||||||
@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
|||||||
%else ; %2 == 19
|
%else ; %2 == 19
|
||||||
lea r1, [r1+r2*(4>>r2shr)]
|
lea r1, [r1+r2*(4>>r2shr)]
|
||||||
%endif ; %2 == 15/19
|
%endif ; %2 == 15/19
|
||||||
lea r5, [r5+r2*(2>>r2shr)]
|
lea r5, [r5+r2*(4>>r2shr)]
|
||||||
neg r2
|
neg r2
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
%if %3 == 4 ; filterSize == 4 scaling
|
%if %3 == 4 ; filterSize == 4 scaling
|
||||||
; load 2x4 or 4x4 source pixels into m0/m1
|
; load 2x4 or 4x4 source pixels into m0/m1
|
||||||
movsx r0, word [r5+r2*2+0] ; filterPos[0]
|
mov32 r0, dword [r5+r2*4+0] ; filterPos[0]
|
||||||
movsx r6, word [r5+r2*2+2] ; filterPos[1]
|
mov32 r6, dword [r5+r2*4+4] ; filterPos[1]
|
||||||
movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}]
|
movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}]
|
||||||
%if mmsize == 8
|
%if mmsize == 8
|
||||||
movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
||||||
@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
|||||||
%else ; %1 == 8
|
%else ; %1 == 8
|
||||||
movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
|
||||||
%endif
|
%endif
|
||||||
movsx r0, word [r5+r2*2+4] ; filterPos[2]
|
mov32 r0, dword [r5+r2*4+8] ; filterPos[2]
|
||||||
movsx r6, word [r5+r2*2+6] ; filterPos[3]
|
mov32 r6, dword [r5+r2*4+12] ; filterPos[3]
|
||||||
movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}]
|
movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}]
|
||||||
%if %1 > 8
|
%if %1 > 8
|
||||||
movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}]
|
movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}]
|
||||||
@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
|||||||
%endif ; mmx/sse2/ssse3/sse4
|
%endif ; mmx/sse2/ssse3/sse4
|
||||||
%else ; %3 == 8, i.e. filterSize == 8 scaling
|
%else ; %3 == 8, i.e. filterSize == 8 scaling
|
||||||
; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
|
; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
|
||||||
movsx r0, word [r5+r2*1+0] ; filterPos[0]
|
mov32 r0, dword [r5+r2*2+0] ; filterPos[0]
|
||||||
movsx r6, word [r5+r2*1+2] ; filterPos[1]
|
mov32 r6, dword [r5+r2*2+4] ; filterPos[1]
|
||||||
movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
|
movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
|
||||||
%if mmsize == 8
|
%if mmsize == 8
|
||||||
movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}]
|
movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}]
|
||||||
@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
|||||||
movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}]
|
movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}]
|
||||||
%else ; mmsize == 16
|
%else ; mmsize == 16
|
||||||
movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
|
movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
|
||||||
movsx r0, word [r5+r2*1+4] ; filterPos[2]
|
mov32 r0, dword [r5+r2*2+8] ; filterPos[2]
|
||||||
movsx r6, word [r5+r2*1+6] ; filterPos[3]
|
mov32 r6, dword [r5+r2*2+12] ; filterPos[3]
|
||||||
movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
|
movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
|
||||||
movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
|
movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
|
||||||
%endif ; mmsize == 8/16
|
%endif ; mmsize == 8/16
|
||||||
@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
|||||||
%define r1x r1
|
%define r1x r1
|
||||||
%define filter2 r6m
|
%define filter2 r6m
|
||||||
%endif ; x86-32/64
|
%endif ; x86-32/64
|
||||||
lea r5, [r5+r2*2]
|
lea r5, [r5+r2*4]
|
||||||
%if %2 == 15
|
%if %2 == 15
|
||||||
lea r1, [r1+r2*2]
|
lea r1, [r1+r2*2]
|
||||||
%else ; %2 == 19
|
%else ; %2 == 19
|
||||||
@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
|||||||
neg r2
|
neg r2
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movsx r0, word [r5+r2*2+0] ; filterPos[0]
|
mov32 r0, dword [r5+r2*4+0] ; filterPos[0]
|
||||||
movsx r1x, word [r5+r2*2+2] ; filterPos[1]
|
mov32 r1x, dword [r5+r2*4+4] ; filterPos[1]
|
||||||
; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
|
; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
|
||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
pxor m5, m5
|
pxor m5, m5
|
||||||
@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
|
|||||||
jl .innerloop
|
jl .innerloop
|
||||||
|
|
||||||
%ifidn %4, X4
|
%ifidn %4, X4
|
||||||
movsx r1x, word [r5+r2*2+2] ; filterPos[1]
|
mov32 r1x, dword [r5+r2*4+4] ; filterPos[1]
|
||||||
movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0]
|
movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0]
|
||||||
sub r1x, r6 ; and first 4 srcpx of dstpx[1]
|
sub r1x, r6 ; and first 4 srcpx of dstpx[1]
|
||||||
%if %1 > 8
|
%if %1 > 8
|
||||||
|
@ -94,8 +94,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
|
|||||||
int16_t **alpPixBuf= c->alpPixBuf;
|
int16_t **alpPixBuf= c->alpPixBuf;
|
||||||
const int vLumBufSize= c->vLumBufSize;
|
const int vLumBufSize= c->vLumBufSize;
|
||||||
const int vChrBufSize= c->vChrBufSize;
|
const int vChrBufSize= c->vChrBufSize;
|
||||||
int16_t *vLumFilterPos= c->vLumFilterPos;
|
int32_t *vLumFilterPos= c->vLumFilterPos;
|
||||||
int16_t *vChrFilterPos= c->vChrFilterPos;
|
int32_t *vChrFilterPos= c->vChrFilterPos;
|
||||||
int16_t *vLumFilter= c->vLumFilter;
|
int16_t *vLumFilter= c->vLumFilter;
|
||||||
int16_t *vChrFilter= c->vChrFilter;
|
int16_t *vChrFilter= c->vChrFilter;
|
||||||
int32_t *lumMmxFilter= c->lumMmxFilter;
|
int32_t *lumMmxFilter= c->lumMmxFilter;
|
||||||
@ -266,7 +266,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt(
|
|||||||
SwsContext *c, int16_t *data, \
|
SwsContext *c, int16_t *data, \
|
||||||
int dstW, const uint8_t *src, \
|
int dstW, const uint8_t *src, \
|
||||||
const int16_t *filter, \
|
const int16_t *filter, \
|
||||||
const int16_t *filterPos, int filterSize)
|
const int32_t *filterPos, int filterSize)
|
||||||
|
|
||||||
#define SCALE_FUNCS(filter_n, opt) \
|
#define SCALE_FUNCS(filter_n, opt) \
|
||||||
SCALE_FUNC(filter_n, 8, 15, opt); \
|
SCALE_FUNC(filter_n, 8, 15, opt); \
|
||||||
|
@ -1450,7 +1450,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
|||||||
int dstWidth, const uint8_t *src,
|
int dstWidth, const uint8_t *src,
|
||||||
int srcW, int xInc)
|
int srcW, int xInc)
|
||||||
{
|
{
|
||||||
int16_t *filterPos = c->hLumFilterPos;
|
int32_t *filterPos = c->hLumFilterPos;
|
||||||
int16_t *filter = c->hLumFilter;
|
int16_t *filter = c->hLumFilter;
|
||||||
void *mmx2FilterCode= c->lumMmx2FilterCode;
|
void *mmx2FilterCode= c->lumMmx2FilterCode;
|
||||||
int i;
|
int i;
|
||||||
@ -1546,7 +1546,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
|||||||
int dstWidth, const uint8_t *src1,
|
int dstWidth, const uint8_t *src1,
|
||||||
const uint8_t *src2, int srcW, int xInc)
|
const uint8_t *src2, int srcW, int xInc)
|
||||||
{
|
{
|
||||||
int16_t *filterPos = c->hChrFilterPos;
|
int32_t *filterPos = c->hChrFilterPos;
|
||||||
int16_t *filter = c->hChrFilter;
|
int16_t *filter = c->hChrFilter;
|
||||||
void *mmx2FilterCode= c->chrMmx2FilterCode;
|
void *mmx2FilterCode= c->chrMmx2FilterCode;
|
||||||
int i;
|
int i;
|
||||||
|
@ -142,7 +142,7 @@ fate:: $(FATE)
|
|||||||
|
|
||||||
$(FATE): $(TOOL)$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
|
$(FATE): $(TOOL)$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
|
||||||
@echo "TEST $(@:fate-%=%)"
|
@echo "TEST $(@:fate-%=%)"
|
||||||
$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(TOOL)'
|
$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)'
|
||||||
|
|
||||||
fate-list:
|
fate-list:
|
||||||
@printf '%s\n' $(sort $(FATE))
|
@printf '%s\n' $(sort $(FATE))
|
||||||
|
@ -17,7 +17,7 @@ ref=${7:-"${base}/ref/fate/${test}"}
|
|||||||
fuzz=$8
|
fuzz=$8
|
||||||
threads=${9:-1}
|
threads=${9:-1}
|
||||||
thread_type=${10:-frame+slice}
|
thread_type=${10:-frame+slice}
|
||||||
tool=${11}
|
cpuflags=${11:-all}
|
||||||
|
|
||||||
outdir="tests/data/fate"
|
outdir="tests/data/fate"
|
||||||
outfile="${outdir}/${test}"
|
outfile="${outdir}/${test}"
|
||||||
@ -51,7 +51,7 @@ run(){
|
|||||||
}
|
}
|
||||||
|
|
||||||
avconv(){
|
avconv(){
|
||||||
run $tool -nostats -threads $threads -thread_type $thread_type "$@"
|
run ffmpeg -nostats -threads $threads -thread_type $thread_type -cpuflags $cpuflags "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
framecrc(){
|
framecrc(){
|
||||||
@ -77,7 +77,7 @@ pcm(){
|
|||||||
regtest(){
|
regtest(){
|
||||||
t="${test#$2-}"
|
t="${test#$2-}"
|
||||||
ref=${base}/ref/$2/$t
|
ref=${base}/ref/$2/$t
|
||||||
${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" "$tool" "$samples"
|
${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" "$cpuflags" "$samples"
|
||||||
}
|
}
|
||||||
|
|
||||||
codectest(){
|
codectest(){
|
||||||
|
@ -10,7 +10,7 @@ raw_src_dir=$3
|
|||||||
target_exec=$4
|
target_exec=$4
|
||||||
target_path=$5
|
target_path=$5
|
||||||
threads=${6:-1}
|
threads=${6:-1}
|
||||||
tool=$8
|
cpuflags=${8:-all}
|
||||||
samples=$9
|
samples=$9
|
||||||
|
|
||||||
datadir="./tests/data"
|
datadir="./tests/data"
|
||||||
@ -20,7 +20,7 @@ this="$test.$test_ref"
|
|||||||
outfile="$datadir/$test_ref/"
|
outfile="$datadir/$test_ref/"
|
||||||
|
|
||||||
# various files
|
# various files
|
||||||
avconv="$target_exec ${target_path}/${tool}"
|
avconv="$target_exec ${target_path}/ffmpeg"
|
||||||
tiny_psnr="tests/tiny_psnr"
|
tiny_psnr="tests/tiny_psnr"
|
||||||
raw_src="${target_path}/$raw_src_dir/%02d.pgm"
|
raw_src="${target_path}/$raw_src_dir/%02d.pgm"
|
||||||
raw_dst="$datadir/$this.out.yuv"
|
raw_dst="$datadir/$this.out.yuv"
|
||||||
@ -45,7 +45,7 @@ echov(){
|
|||||||
|
|
||||||
. $(dirname $0)/md5.sh
|
. $(dirname $0)/md5.sh
|
||||||
|
|
||||||
AVCONV_OPTS="-nostats -y"
|
AVCONV_OPTS="-nostats -y -cpuflags $cpuflags"
|
||||||
COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact"
|
COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact"
|
||||||
DEC_OPTS="$COMMON_OPTS -threads $threads"
|
DEC_OPTS="$COMMON_OPTS -threads $threads"
|
||||||
ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint"
|
ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint"
|
||||||
|
Loading…
Reference in New Issue
Block a user