Compare commits

..

2 Commits

Author SHA1 Message Date
John Koleszar
755e2a290b fix integer promotion bug in partition size check
The check '(user_data_end - partition < partition_size)' must be
evaluated as a signed comparison, but because partition_size was
unsigned, the LHS was promoted to unsigned, causing an incorrect
result on 32-bit. Instead, check the upper and lower bounds of
the segment separately.

Change-Id: Ia01708be8492e64abb16b8157e816bd59e2472cf
2010-11-08 16:56:11 -05:00
Yunqing Wang
30ba8f2ae3 Save XMM registers in asm functions
XMM6/7 are used in these functions, and need to be saved.

Change-Id: I7270ef95b479acf29698d34c8d14bf5600a02d64
2010-11-08 16:55:44 -05:00
72 changed files with 2842 additions and 4298 deletions

45
args.c
View File

@@ -135,17 +135,6 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
def->long_name, long_val);
fprintf(fp, " %-37s\t%s\n", option_text, def->desc);
if(def->enums)
{
const struct arg_enum_list *listptr;
fprintf(fp, " %-37s\t ", "");
for(listptr = def->enums; listptr->name; listptr++)
fprintf(fp, "%s%s", listptr->name,
listptr[1].name ? ", " : "\n");
}
}
}
@@ -229,37 +218,3 @@ struct vpx_rational arg_parse_rational(const struct arg *arg)
return rat;
}
int arg_parse_enum(const struct arg *arg)
{
const struct arg_enum_list *listptr;
long int rawval;
char *endptr;
/* First see if the value can be parsed as a raw value */
rawval = strtol(arg->val, &endptr, 10);
if (arg->val[0] != '\0' && endptr[0] == '\0')
{
/* Got a raw value, make sure it's valid */
for(listptr = arg->def->enums; listptr->name; listptr++)
if(listptr->val == rawval)
return rawval;
}
/* Next see if it can be parsed as a string */
for(listptr = arg->def->enums; listptr->name; listptr++)
if(!strcmp(arg->val, listptr->name))
return listptr->val;
die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
return 0;
}
int arg_parse_enum_or_int(const struct arg *arg)
{
if(arg->def->enums)
return arg_parse_enum(arg);
return arg_parse_int(arg);
}

12
args.h
View File

@@ -22,23 +22,14 @@ struct arg
const struct arg_def *def;
};
struct arg_enum_list
{
const char *name;
int val;
};
#define ARG_ENUM_LIST_END {0}
typedef struct arg_def
{
const char *short_name;
const char *long_name;
int has_val;
const char *desc;
const struct arg_enum_list *enums;
} arg_def_t;
#define ARG_DEF(s,l,v,d) {s,l,v,d, NULL}
#define ARG_DEF_ENUM(s,l,v,d,e) {s,l,v,d,e}
#define ARG_DEF(s,l,v,d) {s,l,v,d}
#define ARG_DEF_LIST_END {0}
struct arg arg_init(char **argv);
@@ -50,5 +41,4 @@ char **argv_dup(int argc, const char **argv);
unsigned int arg_parse_uint(const struct arg *arg);
int arg_parse_int(const struct arg *arg);
struct vpx_rational arg_parse_rational(const struct arg *arg);
int arg_parse_enum_or_int(const struct arg *arg);
#endif

View File

@@ -547,10 +547,6 @@ process_common_toolchain() {
tgt_isa=universal
tgt_os=darwin9
;;
*darwin10*)
tgt_isa=x86_64
tgt_os=darwin10
;;
*mingw32*|*cygwin*)
[ -z "$tgt_isa" ] && tgt_isa=x86
tgt_os=win32
@@ -610,12 +606,6 @@ process_common_toolchain() {
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk"
add_ldflags "-mmacosx-version-min=10.5"
;;
*-darwin10-*)
add_cflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
add_cflags "-mmacosx-version-min=10.6"
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
add_ldflags "-mmacosx-version-min=10.6"
;;
esac
# Handle Solaris variants. Solaris 10 needs -lposix4
@@ -834,7 +824,6 @@ process_common_toolchain() {
soft_enable sse2
soft_enable sse3
soft_enable ssse3
soft_enable sse4_1
case ${tgt_os} in
win*)
@@ -890,7 +879,7 @@ process_common_toolchain() {
case ${tgt_os} in
win*)
add_asflags -f win${bits}
enabled debug && add_asflags -g cv8
enabled debug && add_asflags -g dwarf2
;;
linux*|solaris*)
add_asflags -f elf${bits}

11
configure vendored
View File

@@ -41,7 +41,6 @@ Advanced options:
${toggle_shared} shared library support
${toggle_small} favor smaller size over speed
${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only)
${toggle_postproc_visualizer} macro block / block level visualizers
Codecs:
Codecs can be selectively enabled or disabled individually, or by family:
@@ -115,7 +114,6 @@ all_platforms="${all_platforms} x86-win32-vs7"
all_platforms="${all_platforms} x86-win32-vs8"
all_platforms="${all_platforms} x86-win32-vs9"
all_platforms="${all_platforms} x86_64-darwin9-gcc"
all_platforms="${all_platforms} x86_64-darwin10-gcc"
all_platforms="${all_platforms} x86_64-linux-gcc"
all_platforms="${all_platforms} x86_64-linux-icc"
all_platforms="${all_platforms} x86_64-solaris-gcc"
@@ -201,7 +199,6 @@ ARCH_EXT_LIST="
sse2
sse3
ssse3
sse4_1
altivec
"
@@ -252,7 +249,6 @@ CONFIG_LIST="
shared
small
arm_asm_detok
postproc_visualizer
"
CMDLINE_SELECT="
extra_warnings
@@ -292,7 +288,6 @@ CMDLINE_SELECT="
shared
small
arm_asm_detok
postproc_visualizer
"
process_cmdline() {
@@ -329,6 +324,8 @@ post_process_cmdline() {
for c in ${CODECS}; do
enabled ${c} && enable ${c##*_}s
done
}
@@ -538,10 +535,6 @@ process_toolchain() {
# Other toolchain specific defaults
case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
if enabled postproc_visualizer; then
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
fi
}

View File

@@ -17,7 +17,6 @@ vpxdec.SRCS += md5_utils.c md5_utils.h
vpxdec.SRCS += vpx_ports/vpx_timer.h
vpxdec.SRCS += vpx/vpx_integer.h
vpxdec.SRCS += args.c args.h vpx_ports/config.h
vpxdec.SRCS += tools_common.c tools_common.h
vpxdec.SRCS += nestegg/halloc/halloc.h
vpxdec.SRCS += nestegg/halloc/src/align.h
vpxdec.SRCS += nestegg/halloc/src/halloc.c
@@ -29,13 +28,11 @@ vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950
vpxdec.DESCRIPTION = Full featured decoder
UTILS-$(CONFIG_ENCODERS) += vpxenc.c
vpxenc.SRCS += args.c args.h y4minput.c y4minput.h
vpxenc.SRCS += tools_common.c tools_common.h
vpxenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
vpxenc.SRCS += libmkv/EbmlIDs.h
vpxenc.SRCS += libmkv/EbmlWriter.c
vpxenc.SRCS += libmkv/EbmlWriter.h
vpxenc.SRCS += experimental.c
vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
vpxenc.DESCRIPTION = Full featured encoder

View File

@@ -78,8 +78,8 @@ if(frame_cnt + 1 == 22) {
} else if(frame_cnt + 1 == 44) {
vpx_active_map_t active;
active.rows = cfg.g_h/16;
active.cols = cfg.g_w/16;
active.rows = 240/16;
active.cols = 320/16;
/* pass in null map to disable active_map*/
active.active_map = NULL;

View File

@@ -1,29 +0,0 @@
#define EXPERIMENTAL_C
#include <stdio.h>
#include "args.h"
/* Get argument definitions */
#include "experimental.h"
/* Build argument definition list */
static const arg_def_t *xxx_def_list[] = {
#include "experimental.h"
NULL
};
void xxx_show_usage(FILE *fp)
{
arg_show_usage(fp, xxx_def_list);
}
int xxx_parse_arg(char **argi)
{
struct arg arg;
arg = arg_init(argi);
if(0);
#include "experimental.h"
else return 0;
return 1;
}

View File

@@ -1,56 +0,0 @@
#if defined(EXPERIMENTAL_C)
/* The experimental.c file includes this file multiple times to build up the
* required state.
*/
#if !defined(XXX_ARG_DEF)
#define XXX_ARG_DEF(sym, value) \
static const arg_def_t xxx_arg_def_##sym = \
ARG_DEF(NULL, #sym, 1, "Experimental");
#define XXX_DEFINE_INT(sym, value) \
XXX_ARG_DEF(sym, value); int xxx_##sym = value;
#define XXX_DEFINE_UINT(sym, value) \
XXX_ARG_DEF(sym, value); unsigned int xxx_##sym = value;
#elif !defined(XXX_ARG_DEF_LIST)
#define XXX_ARG_DEF_LIST(sym) &xxx_arg_def_##sym,
#undef XXX_DEFINE_INT
#define XXX_DEFINE_INT(sym, value) XXX_ARG_DEF_LIST(sym)
#undef XXX_DEFINE_UINT
#define XXX_DEFINE_UINT(sym, value) XXX_ARG_DEF_LIST(sym)
#elif !defined(XXX_ARG_MATCH)
#define XXX_ARG_MATCH
#undef XXX_DEFINE_INT
#define XXX_DEFINE_INT(sym, value)\
else if (arg_match(&arg, &xxx_arg_def_##sym, argi)) \
xxx_##sym = arg_parse_int(&arg);
#undef XXX_DEFINE_UINT
#define XXX_DEFINE_UINT(sym, value)\
else if (arg_match(&arg, &xxx_arg_def_##sym, argi)) \
xxx_##sym = arg_parse_uint(&arg);
#endif
#else
/* All other files just get the extern references to these symbols. */
#define XXX_DEFINE_INT(sym, value) extern int xxx_##sym;
#define XXX_DEFINE_UINT(sym, value) extern unsigned int xxx_##sym;
#include <stdio.h>
void xxx_show_usage(FILE *fp);
int xxx_parse_arg(char **argi);
#endif
/*
* BEGIN EXPERIMENTS BELOW
*
* XXX_DEFINE_INT(knob, 0)
*/
XXX_DEFINE_INT(foo, 0)
XXX_DEFINE_INT(bar, 0)

View File

@@ -1,24 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include "tools_common.h"
#ifdef _WIN32
#include <io.h>
#include <fcntl.h>
#endif
FILE* set_binary_mode(FILE *stream)
{
(void)stream;
#ifdef _WIN32
_setmode(_fileno(stream), _O_BINARY);
#endif
return stream;
}

View File

@@ -1,16 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef TOOLS_COMMON_H
#define TOOLS_COMMON_H
/* Sets a stdio stream into binary mode */
FILE* set_binary_mode(FILE *stream);
#endif

View File

@@ -36,14 +36,6 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
7, 11, 14, 15,
};
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
{
1, 2, 6, 7,
3, 5, 8, 13,
4, 9, 12, 14,
10, 11, 15, 16
};
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
@@ -114,20 +106,23 @@ static void init_bit_trees()
init_bit_tree(cat6, 11);
}
static vp8bc_index_t bcc1[1], bcc2[2], bcc3[3], bcc4[4], bcc5[5], bcc6[11];
vp8_extra_bit_struct vp8_extra_bits[12] =
{
{ 0, 0, 0, 0},
{ 0, 0, 0, 1},
{ 0, 0, 0, 2},
{ 0, 0, 0, 3},
{ 0, 0, 0, 4},
{ cat1, Pcat1, 1, 5},
{ cat2, Pcat2, 2, 7},
{ cat3, Pcat3, 3, 11},
{ cat4, Pcat4, 4, 19},
{ cat5, Pcat5, 5, 35},
{ cat6, Pcat6, 11, 67},
{ 0, 0, 0, 0}
{ 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 1},
{ 0, 0, 0, 0, 2},
{ 0, 0, 0, 0, 3},
{ 0, 0, 0, 0, 4},
{ cat1, Pcat1, bcc1, 1, 5},
{ cat2, Pcat2, bcc2, 2, 7},
{ cat3, Pcat3, bcc3, 3, 11},
{ cat4, Pcat4, bcc4, 4, 19},
{ cat5, Pcat5, bcc5, 5, 35},
{ cat6, Pcat6, bcc6, 11, 67},
{ 0, 0, 0, 0, 0}
};
#include "defaultcoefcounts.h"

View File

@@ -24,10 +24,10 @@
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define vp8_coef_tokens 12
@@ -42,6 +42,7 @@ typedef struct
{
vp8_tree_p tree;
const vp8_prob *prob;
vp8bc_index_t *prob_bc;
int Len;
int base_val;
} vp8_extra_bit_struct;
@@ -94,7 +95,6 @@ struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *);
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
extern short vp8_default_zig_zag_mask[16];
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];

View File

@@ -65,13 +65,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
rtcd->postproc.down = vp8_mbpost_proc_down_c;
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c;
rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c;
rtcd->postproc.blend_b = vp8_blend_b_c;
rtcd->postproc.down = vp8_mbpost_proc_down_c;
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
rtcd->postproc.blend_mb = vp8_blend_mb_c;
#endif
#endif

View File

@@ -18,7 +18,6 @@ extern "C"
#endif
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
#include "vpx_scale/yv12config.h"
#include "type_aliases.h"
#include "ppflags.h"
@@ -190,8 +189,6 @@ extern "C"
struct vpx_fixed_buf two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vp8e_tuning tuning;
} VP8_CONFIG;
@@ -207,7 +204,7 @@ extern "C"
// and not just a copy of the pointer..
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags);
int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);

View File

@@ -105,7 +105,7 @@ typedef struct VP8Common
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */
FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
FRAME_TYPE frame_type;
int show_frame;

View File

@@ -51,7 +51,7 @@ extern "C"
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags);
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags);
int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);

View File

@@ -26,7 +26,7 @@
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
/* global constants */
#if CONFIG_POSTPROC_VISUALIZER
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
{
{ RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
@@ -41,32 +41,13 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
{ RGB_TO_YUV(0xFF0000) } /* Red */
};
static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
{
{ RGB_TO_YUV(0x6633ff) }, /* Purple */
{ RGB_TO_YUV(0xcc33ff) }, /* Magenta */
{ RGB_TO_YUV(0xff33cc) }, /* Pink */
{ RGB_TO_YUV(0xff3366) }, /* Coral */
{ RGB_TO_YUV(0x3366ff) }, /* Blue */
{ RGB_TO_YUV(0xed00f5) }, /* Dark Blue */
{ RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */
{ RGB_TO_YUV(0xff6633) }, /* Orange */
{ RGB_TO_YUV(0x33ccff) }, /* Light Blue */
{ RGB_TO_YUV(0x8ab800) }, /* Green */
{ RGB_TO_YUV(0xffcc33) }, /* Light Orange */
{ RGB_TO_YUV(0x33ffcc) }, /* Aqua */
{ RGB_TO_YUV(0x66ff33) }, /* Light Green */
{ RGB_TO_YUV(0xccff33) }, /* Yellow */
};
static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] =
static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
{
{ RGB_TO_YUV(0x00ff00) }, /* Blue */
{ RGB_TO_YUV(0x0000ff) }, /* Green */
{ RGB_TO_YUV(0xffff00) }, /* Yellow */
{ RGB_TO_YUV(0xff0000) }, /* Red */
};
#endif
static const short kernel5[] =
{
@@ -495,7 +476,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
* edges unblended to give distinction to macro blocks in areas
* filled with the same color block.
*/
void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
int y1, int u1, int v1, int alpha, int stride)
{
int i, j;
@@ -503,10 +484,10 @@ void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
int u1_const = u1*((1<<16)-alpha);
int v1_const = v1*((1<<16)-alpha);
y += 2*stride + 2;
for (i = 0; i < 12; i++)
y += stride + 2;
for (i = 0; i < 14; i++)
{
for (j = 0; j < 12; j++)
for (j = 0; j < 14; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
@@ -530,104 +511,6 @@ void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
}
}
/* Blend only the edge of the macro block. Leave center
* unblended to allow for other visualizations to be layered.
*/
void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
int y1, int u1, int v1, int alpha, int stride)
{
int i, j;
int y1_const = y1*((1<<16)-alpha);
int u1_const = u1*((1<<16)-alpha);
int v1_const = v1*((1<<16)-alpha);
for (i = 0; i < 2; i++)
{
for (j = 0; j < 16; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
y += stride;
}
for (i = 0; i < 12; i++)
{
y[0] = (y[0]*alpha + y1_const)>>16;
y[1] = (y[1]*alpha + y1_const)>>16;
y[14] = (y[14]*alpha + y1_const)>>16;
y[15] = (y[15]*alpha + y1_const)>>16;
y += stride;
}
for (i = 0; i < 2; i++)
{
for (j = 0; j < 16; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
y += stride;
}
stride >>= 1;
for (j = 0; j < 8; j++)
{
u[j] = (u[j]*alpha + u1_const)>>16;
v[j] = (v[j]*alpha + v1_const)>>16;
}
u += stride;
v += stride;
for (i = 0; i < 6; i++)
{
u[0] = (u[0]*alpha + u1_const)>>16;
v[0] = (v[0]*alpha + v1_const)>>16;
u[7] = (u[7]*alpha + u1_const)>>16;
v[7] = (v[7]*alpha + v1_const)>>16;
u += stride;
v += stride;
}
for (j = 0; j < 8; j++)
{
u[j] = (u[j]*alpha + u1_const)>>16;
v[j] = (v[j]*alpha + v1_const)>>16;
}
}
void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
int y1, int u1, int v1, int alpha, int stride)
{
int i, j;
int y1_const = y1*((1<<16)-alpha);
int u1_const = u1*((1<<16)-alpha);
int v1_const = v1*((1<<16)-alpha);
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
y += stride;
}
stride >>= 1;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
u[j] = (u[j]*alpha + u1_const)>>16;
v[j] = (v[j]*alpha + v1_const)>>16;
}
u += stride;
v += stride;
}
}
static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
{
int dx;
@@ -639,7 +522,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*x1 = width;
if (dx)
if (dy)
*y1 = ((width-x0)*dy)/dx + y0;
}
if (*x1 < 0)
@@ -648,7 +531,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*x1 = 0;
if (dx)
if (dy)
*y1 = ((0-x0)*dy)/dx + y0;
}
if (*y1 > height)
@@ -657,7 +540,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*y1 = height;
if (dy)
if (dx)
*x1 = ((height-y0)*dx)/dy + x0;
}
if (*y1 < 0)
@@ -666,7 +549,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*y1 = 0;
if (dy)
if (dx)
*x1 = ((0-y0)*dx)/dy + x0;
}
}
@@ -678,13 +561,10 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
#define RTCD_VTABLE(oci) NULL
#endif
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
{
char message[512];
int q = oci->filter_level * 10 / 6;
int flags = ppflags->post_proc_flag;
int deblock_level = ppflags->deblocking_level;
int noise_level = ppflags->noise_level;
if (!oci->frame_to_show)
return -1;
@@ -741,8 +621,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
oci->post_proc_buffer.y_stride);
}
#if CONFIG_POSTPROC_VISUALIZER
if (flags & VP8D_DEBUG_TXT_FRAME_INFO)
if (flags & VP8D_DEBUG_LEVEL1)
{
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
(oci->frame_type == KEY_FRAME),
@@ -754,7 +633,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
}
if (flags & VP8D_DEBUG_TXT_MBLK_MODES)
if (flags & VP8D_DEBUG_LEVEL2)
{
int i, j;
unsigned char *y_ptr;
@@ -786,7 +665,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
}
}
if (flags & VP8D_DEBUG_TXT_DC_DIFF)
if (flags & VP8D_DEBUG_LEVEL3)
{
int i, j;
unsigned char *y_ptr;
@@ -821,14 +700,45 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
}
}
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
if (flags & VP8D_DEBUG_LEVEL4)
{
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
#if 0
int i, j;
unsigned char *y_ptr;
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int mb_rows = post->y_height >> 4;
int mb_cols = post->y_width >> 4;
int mb_index = 0;
MODE_INFO *mi = oci->mi;
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
/* vp8_filter each macro block */
for (i = 0; i < mb_rows; i++)
{
for (j = 0; j < mb_cols; j++)
{
char zz[4];
sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
vp8_blit_text(zz, y_ptr, post->y_stride);
mb_index ++;
y_ptr += 16;
}
mb_index ++; /* border */
y_ptr += post->y_stride * 16 - post->y_width;
}
#endif
}
/* Draw motion vectors */
if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag)
if (flags & VP8D_DEBUG_LEVEL5)
{
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int width = post->y_width;
@@ -839,144 +749,29 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
MODE_INFO *mi = oci->mi;
int x0, y0;
for (y0 = 0; y0 < height; y0 += 16)
for (y0 = 8; y0 < (height + 8); y0 += 16)
{
for (x0 = 0; x0 < width; x0 += 16)
for (x0 = 8; x0 < (width + 8); x0 += 16)
{
int x1, y1;
if (!(ppflags->display_mv_flag & (1<<mi->mbmi.mode)))
{
mi++;
continue;
}
if (mi->mbmi.mode == SPLITMV)
{
switch (mi->mbmi.partitioning)
{
case 0 : /* mv_top_bottom */
{
B_MODE_INFO *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
x1 = x0 + 8 + (mv->col >> 3);
y1 = y0 + 4 + (mv->row >> 3);
constrain_line (x0+8, &x1, y0+4, &y1, width, height);
vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride);
bmi = &mi->bmi[8];
x1 = x0 + 8 + (mv->col >> 3);
y1 = y0 +12 + (mv->row >> 3);
constrain_line (x0+8, &x1, y0+12, &y1, width, height);
vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride);
break;
}
case 1 : /* mv_left_right */
{
B_MODE_INFO *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
x1 = x0 + 4 + (mv->col >> 3);
y1 = y0 + 8 + (mv->row >> 3);
constrain_line (x0+4, &x1, y0+8, &y1, width, height);
vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride);
bmi = &mi->bmi[2];
x1 = x0 +12 + (mv->col >> 3);
y1 = y0 + 8 + (mv->row >> 3);
constrain_line (x0+12, &x1, y0+8, &y1, width, height);
vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride);
break;
}
case 2 : /* mv_quarters */
{
B_MODE_INFO *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
x1 = x0 + 4 + (mv->col >> 3);
y1 = y0 + 4 + (mv->row >> 3);
constrain_line (x0+4, &x1, y0+4, &y1, width, height);
vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride);
bmi = &mi->bmi[2];
x1 = x0 +12 + (mv->col >> 3);
y1 = y0 + 4 + (mv->row >> 3);
constrain_line (x0+12, &x1, y0+4, &y1, width, height);
vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride);
bmi = &mi->bmi[8];
x1 = x0 + 4 + (mv->col >> 3);
y1 = y0 +12 + (mv->row >> 3);
constrain_line (x0+4, &x1, y0+12, &y1, width, height);
vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride);
bmi = &mi->bmi[10];
x1 = x0 +12 + (mv->col >> 3);
y1 = y0 +12 + (mv->row >> 3);
constrain_line (x0+12, &x1, y0+12, &y1, width, height);
vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride);
break;
}
default :
{
B_MODE_INFO *bmi = mi->bmi;
int bx0, by0;
for (by0 = y0; by0 < (y0+16); by0 += 4)
{
for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
{
MV *mv = &bmi->mv.as_mv;
x1 = bx0 + 2 + (mv->col >> 3);
y1 = by0 + 2 + (mv->row >> 3);
constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride);
bmi++;
}
}
}
}
}
else if (mi->mbmi.mode >= NEARESTMV)
int x1, y1;
if (mi->mbmi.mode >= NEARESTMV)
{
MV *mv = &mi->mbmi.mv.as_mv;
const int lx0 = x0 + 8;
const int ly0 = y0 + 8;
x1 = lx0 + (mv->col >> 3);
y1 = ly0 + (mv->row >> 3);
x1 = x0 + (mv->col >> 3);
y1 = y0 + (mv->row >> 3);
if (x1 != lx0 && y1 != ly0)
if (x1 != x0 && y1 != y0)
{
constrain_line (lx0, &x1, ly0-1, &y1, width, height);
vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride);
constrain_line (x0, &x1, y0-1, &y1, width, height);
vp8_blit_line (x0, x1, y0-1, y1, y_buffer, y_stride);
constrain_line (lx0, &x1, ly0+1, &y1, width, height);
vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride);
constrain_line (x0, &x1, y0+1, &y1, width, height);
vp8_blit_line (x0, x1, y0+1, y1, y_buffer, y_stride);
}
else
vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride);
vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
}
mi++;
}
mi++;
@@ -984,10 +779,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
}
/* Color in block modes */
if ((flags & VP8D_DEBUG_CLR_BLK_MODES)
&& (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag))
if (flags & VP8D_DEBUG_LEVEL6)
{
int y, x;
int i, j;
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int width = post->y_width;
int height = post->y_height;
@@ -997,54 +791,18 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
int y_stride = oci->post_proc_buffer.y_stride;
MODE_INFO *mi = oci->mi;
for (y = 0; y < height; y += 16)
for (i = 0; i < height; i += 16)
{
for (x = 0; x < width; x += 16)
for (j = 0; j < width; j += 16)
{
int Y = 0, U = 0, V = 0;
if (mi->mbmi.mode == B_PRED &&
((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag))
{
int by, bx;
unsigned char *yl, *ul, *vl;
B_MODE_INFO *bmi = mi->bmi;
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
yl = y_ptr + x;
ul = u_ptr + (x>>1);
vl = v_ptr + (x>>1);
for (by = 0; by < 16; by += 4)
{
for (bx = 0; bx < 16; bx += 4)
{
if ((ppflags->display_b_modes_flag & (1<<mi->mbmi.mode))
|| (ppflags->display_mb_modes_flag & B_PRED))
{
Y = B_PREDICTION_MODE_colors[bmi->mode][0];
U = B_PREDICTION_MODE_colors[bmi->mode][1];
V = B_PREDICTION_MODE_colors[bmi->mode][2];
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
(yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
}
bmi++;
}
yl += y_stride*4;
ul += y_stride*1;
vl += y_stride*1;
}
}
else if (ppflags->display_mb_modes_flag & (1<<mi->mbmi.mode))
{
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
}
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
mi++;
}
@@ -1057,9 +815,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
}
/* Color in frame reference blocks */
if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag)
if (flags & VP8D_DEBUG_LEVEL7)
{
int y, x;
int i, j;
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int width = post->y_width;
int height = post->y_height;
@@ -1069,21 +827,18 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
int y_stride = oci->post_proc_buffer.y_stride;
MODE_INFO *mi = oci->mi;
for (y = 0; y < height; y += 16)
for (i = 0; i < height; i += 16)
{
for (x = 0; x < width; x +=16)
for (j = 0; j < width; j +=16)
{
int Y = 0, U = 0, V = 0;
if (ppflags->display_ref_frame_flag & (1<<mi->mbmi.ref_frame))
{
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
}
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
mi++;
}
@@ -1094,7 +849,6 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
mi++;
}
}
#endif
*dest = oci->post_proc_buffer;

View File

@@ -24,15 +24,7 @@
char whiteclamp[16], char bothclamp[16],\
unsigned int w, unsigned int h, int pitch)
#define prototype_postproc_blend_mb_inner(sym)\
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
int y1, int u1, int v1, int alpha, int stride)
#define prototype_postproc_blend_mb_outer(sym)\
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
int y1, int u1, int v1, int alpha, int stride)
#define prototype_postproc_blend_b(sym)\
#define prototype_postproc_blend_mb(sym)\
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
int y1, int u1, int v1, int alpha, int stride)
@@ -60,36 +52,22 @@ extern prototype_postproc(vp8_postproc_downacross);
#endif
extern prototype_postproc_addnoise(vp8_postproc_addnoise);
#ifndef vp8_postproc_blend_mb_inner
#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
#ifndef vp8_postproc_blend_mb
#define vp8_postproc_blend_mb vp8_blend_mb_c
#endif
extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
#ifndef vp8_postproc_blend_mb_outer
#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
#endif
extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
#ifndef vp8_postproc_blend_b
#define vp8_postproc_blend_b vp8_blend_b_c
#endif
extern prototype_postproc_blend_b(vp8_postproc_blend_b);
extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
typedef prototype_postproc((*vp8_postproc_fn_t));
typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
typedef struct
{
vp8_postproc_inplace_fn_t down;
vp8_postproc_inplace_fn_t across;
vp8_postproc_fn_t downacross;
vp8_postproc_addnoise_fn_t addnoise;
vp8_postproc_blend_mb_inner_fn_t blend_mb_inner;
vp8_postproc_blend_mb_outer_fn_t blend_mb_outer;
vp8_postproc_blend_b_fn_t blend_b;
vp8_postproc_inplace_fn_t down;
vp8_postproc_inplace_fn_t across;
vp8_postproc_fn_t downacross;
vp8_postproc_addnoise_fn_t addnoise;
vp8_postproc_blend_mb_fn_t blend_mb;
} vp8_postproc_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
@@ -111,7 +89,7 @@ struct postproc_state
#include "onyxc_int.h"
#include "ppflags.h"
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
vp8_ppflags_t *flags);
int deblock_level, int noise_level, int flags);
void vp8_de_noise(YV12_BUFFER_CONFIG *source,

View File

@@ -13,28 +13,17 @@
#define __INC_PPFLAGS_H
enum
{
VP8D_NOFILTERING = 0,
VP8D_DEBLOCK = 1<<0,
VP8D_DEMACROBLOCK = 1<<1,
VP8D_ADDNOISE = 1<<2,
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
VP8D_DEBUG_DRAW_MV = 1<<7,
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
VP8D_NOFILTERING = 0,
VP8D_DEBLOCK = 1<<0,
VP8D_DEMACROBLOCK = 1<<1,
VP8D_ADDNOISE = 1<<2,
VP8D_DEBUG_LEVEL1 = 1<<3,
VP8D_DEBUG_LEVEL2 = 1<<4,
VP8D_DEBUG_LEVEL3 = 1<<5,
VP8D_DEBUG_LEVEL4 = 1<<6,
VP8D_DEBUG_LEVEL5 = 1<<7,
VP8D_DEBUG_LEVEL6 = 1<<8,
VP8D_DEBUG_LEVEL7 = 1<<9
};
typedef struct
{
int post_proc_flag;
int deblocking_level;
int noise_level;
int display_ref_frame_flag;
int display_mb_modes_flag;
int display_b_modes_flag;
int display_mv_flag;
} vp8_ppflags_t;
#endif

46
vp8/common/preproc.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/****************************************************************************
*
* Module Title : preproc.h
*
* Description : simple preprocessor
*
****************************************************************************/
#ifndef __INC_PREPROC_H
#define __INC_PREPROC_H
/****************************************************************************
* Types
****************************************************************************/
typedef struct
{
unsigned char *frame_buffer;
int frame;
unsigned int *fixed_divide;
unsigned char *frame_buffer_alloc;
unsigned int *fixed_divide_alloc;
} pre_proc_instance;
/****************************************************************************
* Functions.
****************************************************************************/
void pre_proc_machine_specific_config(void);
void delete_pre_proc(pre_proc_instance *ppi);
int init_pre_proc(pre_proc_instance *ppi, int frame_size);
extern void spatial_filter_c(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int width, int height, int pitch, int strength);
extern void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
#endif

76
vp8/common/preprocif.h Normal file
View File

@@ -0,0 +1,76 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/****************************************************************************
*
* Module Title : preproc_if.h
*
* Description : Pre-processor interface header file.
*
****************************************************************************/
#ifndef __PREPROC_IF_H
#define __PREPROC_IF_H
/****************************************************************************
* Header Files
****************************************************************************/
#include "type_aliases.h"
/****************************************************************************
* Types
****************************************************************************/
typedef struct
{
UINT8 *Yuv0ptr;
UINT8 *Yuv1ptr;
UINT8 *frag_info; // blocks coded : passed in
UINT32 frag_info_element_size; // size of each element
UINT32 frag_info_coded_mask; // mask to get at whether fragment is coded
UINT32 *region_index; // Gives pixel index for top left of each block
UINT32 video_frame_height;
UINT32 video_frame_width;
UINT8 hfrag_pixels;
UINT8 vfrag_pixels;
} SCAN_CONFIG_DATA;
typedef enum
{
SCP_FILTER_ON_OFF,
SCP_SET_SRF_OFFSET,
SCP_SET_EBO_ON_OFF,
SCP_SET_VCAP_LEVEL_OFFSET,
SCP_SET_SHOW_LOCAL
} SCP_SETTINGS;
typedef struct PP_INSTANCE *x_pp_inst;
/****************************************************************************
* Module statics
****************************************************************************/
/* Controls whether Early break out is on or off in default case */
#define EARLY_BREAKOUT_DEFAULT TRUE
/****************************************************************************
* Functions
****************************************************************************/
extern void set_scan_param(x_pp_inst ppi, UINT32 param_id, INT32 param_value);
extern UINT32 yuvanalyse_frame(x_pp_inst ppi, UINT32 *KFIndicator);
extern x_pp_inst create_pp_instance(void);
extern void delete_pp_instance(x_pp_inst *);
extern BOOL scan_yuvinit(x_pp_inst, SCAN_CONFIG_DATA *scan_config_ptr);
#endif

View File

@@ -19,7 +19,7 @@
extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
extern void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
#endif

View File

@@ -506,7 +506,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->common.error.setjmp = 0;
return retcode;
}
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags)
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
{
int ret = -1;
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
@@ -524,7 +524,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
sd->clrtype = pbi->common.clr_type;
#if CONFIG_POSTPROC
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
#else
if (pbi->common.frame_to_show)

View File

@@ -596,7 +596,7 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
}
void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
{
#if CONFIG_MULTITHREAD
VP8_COMMON *const pc = & pbi->common;
@@ -647,6 +647,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
for (i=0; i< pc->mb_rows; i++)
CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
}
return 0;
#else
(void) pbi;
(void) width;

View File

@@ -29,9 +29,10 @@
push {r4-r11, lr}
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
; sizeof (TOKENEXTRA) is 8
; sizeof (TOKENEXTRA) is 20
add r2, r2, r2, lsl #2 ; xcount
sub sp, sp, #12
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
add r2, r1, r2, lsl #2 ; stop = p + xcount
str r2, [sp, #0]
str r3, [sp, #8] ; save vp8_coef_encodings
ldr r2, [r0, #vp8_writer_lowvalue]
@@ -40,13 +41,13 @@
b check_p_lt_stop
while_p_lt_stop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r6, [r1, #tokenextra_token] ; t
ldr r4, [sp, #8] ; vp8_coef_encodings
mov lr, #0
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
ldr r9, [r1, #tokenextra_context_tree] ; pp
ldrb r7, [r1, #tokenextra_skip_eob_node]
ldr r7, [r1, #tokenextra_skip_eob_node]
ldr r6, [r4, #vp8_token_value] ; v
ldr r8, [r4, #vp8_token_len] ; n
@@ -141,11 +142,12 @@ token_count_lt_zero
subs r8, r8, #1 ; --n
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r6, [r1, #tokenextra_token] ; t
ldr r7, [sp, #48] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
; element. Here vp8_extra_bit_struct == 20
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
cmp r4, #0
@@ -153,7 +155,7 @@ token_count_lt_zero
; if( b->base_val)
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
cmp r8, #0 ; if( L)
beq no_extra_bits

View File

@@ -62,13 +62,13 @@ mb_row_loop
; actuall work gets done here!
while_p_lt_stop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r6, [r1, #tokenextra_token] ; t
ldr r4, [sp, #20] ; vp8_coef_encodings
mov lr, #0
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
ldr r9, [r1, #tokenextra_context_tree] ; pp
ldrb r7, [r1, #tokenextra_skip_eob_node]
ldr r7, [r1, #tokenextra_skip_eob_node]
ldr r6, [r4, #vp8_token_value] ; v
ldr r8, [r4, #vp8_token_len] ; n
@@ -163,11 +163,12 @@ token_count_lt_zero
subs r8, r8, #1 ; --n
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r6, [r1, #tokenextra_token] ; t
ldr r7, [sp, #8] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
; element. Here vp8_extra_bit_struct == 20
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
cmp r4, #0
@@ -175,7 +176,7 @@ token_count_lt_zero
; if( b->base_val)
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
cmp r8, #0 ; if( L)
beq no_extra_bits

View File

@@ -90,13 +90,13 @@ mb_row_loop
; actual work gets done here!
while_p_lt_stop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r6, [r1, #tokenextra_token] ; t
ldr r4, [sp, #80] ; vp8_coef_encodings
mov lr, #0
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
ldr r9, [r1, #tokenextra_context_tree] ; pp
ldrb r7, [r1, #tokenextra_skip_eob_node]
ldr r7, [r1, #tokenextra_skip_eob_node]
ldr r6, [r4, #vp8_token_value] ; v
ldr r8, [r4, #vp8_token_len] ; n
@@ -191,11 +191,12 @@ token_count_lt_zero
subs r8, r8, #1 ; --n
bne token_loop
ldrb r6, [r1, #tokenextra_token] ; t
ldr r6, [r1, #tokenextra_token] ; t
ldr r7, [sp, #84] ; vp8_extra_bits
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
; element. Here vp8_extra_bit_struct == 16
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
; element. Here vp8_extra_bit_struct == 20
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
cmp r4, #0
@@ -203,7 +204,7 @@ token_count_lt_zero
; if( b->base_val)
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
cmp r8, #0 ; if( L)
beq no_extra_bits

View File

@@ -29,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
{
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant_fast);
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
}
/*

View File

@@ -51,6 +51,7 @@ DEFINE(vp8_token_len, offsetof(vp8_token, Len));
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
DEFINE(vp8_extra_bit_struct_prob_bc, offsetof(vp8_extra_bit_struct, prob_bc));
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
@@ -66,8 +67,8 @@ DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
// These two sizes are used in vp7cx_pack_tokens. They are hard coded
// so if the size changes this will have to be adjusted.
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 20)
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 20)
//add asserts for any offset that is not supported by assembly code
//add asserts for any size that is not supported by assembly code

View File

@@ -33,7 +33,6 @@ typedef struct
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
short *quant;
short *quant_fast;
short *quant_shift;
short *zbin;
short *zrun_zbin_boost;
@@ -82,7 +81,6 @@ typedef struct
int errthresh;
int rddiv;
int rdmult;
INT64 activity_sum;
int mvcosts[2][MVvals+1];
int *mvcost[2];

View File

@@ -62,6 +62,7 @@ unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
static const int qrounding_factors[129] =
{
56, 56, 56, 56, 48, 48, 56, 56,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
@@ -77,18 +78,12 @@ static const int qrounding_factors[129] =
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48
48,
};
static const int qzbin_factors[129] =
{
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
72, 72, 72, 72, 80, 80, 72, 72,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
@@ -99,11 +94,17 @@ static const int qzbin_factors[129] =
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80,
};
static const int qrounding_factors_y2[129] =
{
56, 56, 56, 56, 48, 48, 56, 56,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
@@ -119,18 +120,12 @@ static const int qrounding_factors_y2[129] =
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48,
48
48,
};
static const int qzbin_factors_y2[129] =
{
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84,
72, 72, 72, 72, 80, 80, 72, 72,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
@@ -141,30 +136,26 @@ static const int qzbin_factors_y2[129] =
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80,
};
#define EXACT_QUANT
//#define EXACT_QUANT
#ifdef EXACT_QUANT
static void vp8cx_invert_quant(int improved_quant, short *quant,
short *shift, short d)
static void vp8cx_invert_quant(short *quant, short *shift, short d)
{
if(improved_quant)
{
unsigned t;
int l;
t = d;
for(l = 0; t > 1; l++)
t>>=1;
t = 1 + (1<<(16+l))/d;
*quant = (short)(t - (1<<16));
*shift = l;
}
else
{
*quant = (1 << 16) / d;
*shift = 0;
}
unsigned t;
int l;
t = d;
for(l = 0; t > 1; l++)
t>>=1;
t = 1 + (1<<(16+l))/d;
*quant = (short)(t - (1<<16));
*shift = l;
}
void vp8cx_init_quantizer(VP8_COMP *cpi)
@@ -179,8 +170,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
{
// dc values
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
cpi->Y1quant_shift[Q] + 0, quant_val);
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -188,8 +178,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
cpi->Y2quant_shift[Q] + 0, quant_val);
cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
@@ -197,8 +186,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
vp8cx_invert_quant(cpi->UVquant[Q] + 0,
cpi->UVquant_shift[Q] + 0, quant_val);
cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -211,8 +199,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
int rc = vp8_default_zig_zag1d[i];
quant_val = vp8_ac_yquant(Q);
cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
cpi->Y1quant_shift[Q] + rc, quant_val);
cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -220,8 +207,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
cpi->Y2quant_shift[Q] + rc, quant_val);
cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
@@ -229,8 +215,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
vp8cx_invert_quant(cpi->UVquant[Q] + rc,
cpi->UVquant_shift[Q] + rc, quant_val);
cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -331,7 +316,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
for (i = 0; i < 16; i++)
{
x->block[i].quant = cpi->Y1quant[QIndex];
x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
@@ -346,7 +330,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
for (i = 16; i < 24; i++)
{
x->block[i].quant = cpi->UVquant[QIndex];
x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
x->block[i].round = cpi->UVround[QIndex];
@@ -357,7 +340,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
// Y2
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
x->block[24].quant = cpi->Y2quant[QIndex];
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
x->block[24].zbin = cpi->Y2zbin[QIndex];
@@ -369,9 +351,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
{
// Clear Zbin mode boost for default case
cpi->zbin_mode_boost = 0;
// vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
// when these values are not all zero.
if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
@@ -384,62 +363,6 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
}
/* activity_avg must be positive, or flat regions could get a zero weight
* (infinite lambda), which confounds analysis.
* This also avoids the need for divide by zero checks in
* vp8_activity_masking().
*/
#define VP8_ACTIVITY_AVG_MIN (64)
/* This is used as a reference when computing the source variance for the
* purposes of activity masking.
* Eventually this should be replaced by custom no-reference routines,
* which will be faster.
*/
static const unsigned char VP8_VAR_OFFS[16]=
{
128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
};
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
{
unsigned int act;
unsigned int sse;
int sum;
unsigned int a;
unsigned int b;
unsigned int d;
/* TODO: This could also be done over smaller areas (8x8), but that would
* require extensive changes elsewhere, as lambda is assumed to be fixed
* over an entire MB in most of the code.
* Another option is to compute four 8x8 variances, and pick a single
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
/* This requires a full 32 bits of precision. */
act = (sse<<8) - sum*sum;
/* Drop 4 to give us some headroom to work with. */
act = (act + 8) >> 4;
/* If the region is flat, lower the activity some more. */
if (act < 8<<12)
act = act < 5<<12 ? act : 5<<12;
/* TODO: For non-flat regions, edge regions should receive less masking
* than textured regions, but identifying edge regions quickly and
* reliably enough is still a subject of experimentation.
* This will be most noticable near edges with a complex shape (e.g.,
* text), but the 4x4 transform size should make this less of a problem
* than it would be for an 8x8 transform.
*/
/* Apply the masking to the RD multiplier. */
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
return act;
}
static
void encode_mb_row(VP8_COMP *cpi,
@@ -451,7 +374,6 @@ void encode_mb_row(VP8_COMP *cpi,
int *segment_counts,
int *totalrate)
{
INT64 activity_sum = 0;
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
@@ -480,14 +402,14 @@ void encode_mb_row(VP8_COMP *cpi,
// Set up limit values for vertical motion vector components
// to prevent them extending beyond the UMV borders
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
+ (VP8BORDERINPIXELS - 16);
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
// Distance of Mb to the left & right edges, specified in
// 1/8th pel units as they are always compared to values
// Distance of Mb to the left & right edges, specified in
// 1/8th pel units as they are always compared to values
// that are in 1/8th pel units
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
@@ -495,7 +417,7 @@ void encode_mb_row(VP8_COMP *cpi,
// Set up limit values for horizontal motion vector components
// to prevent them extending beyond the UMV borders
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
+ (VP8BORDERINPIXELS - 16);
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
@@ -503,12 +425,6 @@ void encode_mb_row(VP8_COMP *cpi,
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
activity_sum += vp8_activity_masking(cpi, x);
// Is segmentation enabled
// MB level adjutment to quantizer
if (xd->segmentation_enabled)
@@ -615,7 +531,6 @@ void encode_mb_row(VP8_COMP *cpi,
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
x->activity_sum += activity_sum;
}
@@ -732,7 +647,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
vp8_setup_block_ptrs(x);
x->activity_sum = 0;
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
#if 0
// Experimental rd code
@@ -787,12 +703,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
else
{
#if CONFIG_MULTITHREAD
int i;
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
{
int i;
cpi->current_mb_col_main = -1;
for (i = 0; i < cpi->encoding_thread_count; i++)
@@ -870,11 +785,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
totalrate += cpi->mb_row_ei[i].totalrate;
}
for (i = 0; i < cpi->encoding_thread_count; i++)
{
x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
}
#endif
}
@@ -1010,14 +920,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
/* Update the average activity for the next frame.
* This is feed-forward for now; it could also be saved in two-pass, or
* done during lookahead when that is eventually added.
*/
cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
}
void vp8_setup_block_ptrs(MACROBLOCK *x)
{
@@ -1279,18 +1181,7 @@ int vp8cx_encode_inter_macroblock
if (cpi->sf.RD)
{
/* Are we using the fast quantizer for the mode selection? */
if(cpi->sf.use_fastquant_for_pick)
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
/* switch back to the regular quantizer for the encode */
if (cpi->sf.improved_quant)
{
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
}
}
else
#endif
@@ -1323,25 +1214,11 @@ int vp8cx_encode_inter_macroblock
// Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
if (cpi->zbin_mode_boost_enabled)
{
if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
cpi->zbin_mode_boost = 0;
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
else
{
if (xd->mode_info_context->mbmi.mode == ZEROMV)
{
if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
else
cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
}
else if (xd->mode_info_context->mbmi.mode == SPLITMV)
cpi->zbin_mode_boost = 0;
else
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
}
cpi->zbin_mode_boost = 0;
}
else
cpi->zbin_mode_boost = 0;
vp8cx_mb_init_quantizer(cpi, x);
}

View File

@@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
#if !(CONFIG_REALTIME_ONLY)
#if 1
if (x->optimize)
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
vp8_optimize_mby(x, rtcd);
#endif

View File

@@ -243,9 +243,9 @@ struct vp8_token_state{
};
// TODO: experiments to find optimal multiple numbers
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
#define Y2_RD_MULT 16
#define Y1_RD_MULT 1
#define UV_RD_MULT 1
#define Y2_RD_MULT 4
static const int plane_rd_mult[4]=
{
@@ -309,10 +309,8 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
eob = d->eob;
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
rdmult = mb->rdmult * err_mult;
if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME)
rdmult = (rdmult * 9)>>4;
/* TODO: These should vary with the block type, since the quantizer does. */
rdmult = (mb->rdmult << 2)*err_mult;
rddiv = mb->rddiv;
best_mask[0] = best_mask[1] = 0;
/* Initialize the sentinel node of the trellis. */
@@ -635,7 +633,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mb(x);
#if !(CONFIG_REALTIME_ONLY)
if (x->optimize)
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
vp8_optimize_mb(x, rtcd);
#endif

View File

@@ -61,7 +61,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
volatile int *last_row_current_mb_col;
INT64 activity_sum = 0;
if (ithread > 0)
last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
@@ -112,12 +111,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
activity_sum += vp8_activity_masking(cpi, x);
// Is segmentation enabled
// MB level adjutment to quantizer
if (xd->segmentation_enabled)
@@ -133,7 +126,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
else
xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
x->active_ptr = cpi->active_map + seg_map_index + mb_col;
if (cm->frame_type == KEY_FRAME)
{
@@ -165,28 +157,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
cpi->inter_zz_count ++;
// Special case code for cyclic refresh
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
{
cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
// If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
// Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
// else mark it as dirty (1).
if (xd->mode_info_context->mbmi.segment_id)
cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
{
if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
}
else
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
}
}
cpi->tplist[mb_row].stop = *tp;
x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
@@ -225,7 +197,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
x->activity_sum += activity_sum;
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
@@ -269,6 +240,8 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->sadperbit16 = x->sadperbit16;
z->sadperbit4 = x->sadperbit4;
z->errthresh = x->errthresh;
z->rddiv = x->rddiv;
z->rdmult = x->rdmult;
/*
z->mv_col_min = x->mv_col_min;
@@ -282,7 +255,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4;
z->short_walsh4x4 = x->short_walsh4x4;
z->quantize_b = x->quantize_b;
z->optimize = x->optimize;
/*
z->mvc = x->mvc;
@@ -310,7 +282,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
for (i = 0; i < 25; i++)
{
z->block[i].quant = x->block[i].quant;
z->block[i].quant_fast = x->block[i].quant_fast;
z->block[i].quant_shift = x->block[i].quant_shift;
z->block[i].zbin = x->block[i].zbin;
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
@@ -421,7 +392,8 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
vp8_setup_block_ptrs(mb);
mb->activity_sum = 0;
mb->rddiv = cpi->RDDIV;
mb->rdmult = cpi->RDMULT;
mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc;

View File

@@ -472,7 +472,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
// Initial step/diamond search centred on best mv
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
if ( tmp_err < INT_MAX-new_mv_mode_penalty )
tmp_err += new_mv_mode_penalty;
@@ -495,7 +495,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
num00--;
else
{
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
if ( tmp_err < INT_MAX-new_mv_mode_penalty )
tmp_err += new_mv_mode_penalty;
@@ -1145,7 +1145,6 @@ void vp8_init_second_pass(VP8_COMP *cpi)
cpi->output_frame_rate = cpi->oxcf.frame_rate;
cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0);
cpi->clip_bits_total = cpi->bits_left;
vp8_avg_stats(cpi->total_stats);
@@ -1174,25 +1173,17 @@ void vp8_init_second_pass(VP8_COMP *cpi)
{
start_pos = cpi->stats_in; // Note starting "file" position
cpi->modified_error_total = 0.0;
cpi->modified_error_used = 0.0;
cpi->modified_total_error_left = 0.0;
while (vp8_input_stats(cpi, &this_frame) != EOF)
{
cpi->modified_error_total += calculate_modified_err(cpi, &this_frame);
cpi->modified_total_error_left += calculate_modified_err(cpi, &this_frame);
}
cpi->modified_error_left = cpi->modified_error_total;
reset_fpf_position(cpi, start_pos); // Reset file position
}
// Calculate the clip target modified bits per error
// The observed bpe starts as the same number.
cpi->clip_bpe = cpi->bits_left /
DOUBLE_DIVIDE_CHECK(cpi->modified_error_total);
cpi->observed_bpe = cpi->clip_bpe;
cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in;
}
@@ -1448,7 +1439,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Boost for arf frame
Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
Boost += (i * 50);
Boost += (cpi->baseline_gf_interval * 50);
allocation_chunks = (i * 100) + Boost;
// Normalize Altboost and allocations chunck down to prevent overflow
@@ -1594,9 +1585,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Reset the file position
reset_fpf_position(cpi, start_pos);
// Update the record of error used so far (only done once per gf group)
cpi->modified_error_used += gf_group_err;
// Assign bits to the arf or gf.
{
int Boost;
@@ -1750,6 +1738,16 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
vp8_avg_stats(&sectionstats);
if (sectionstats.pcnt_motion < .17)
cpi->section_is_low_motion = 1;
else
cpi->section_is_low_motion = 0;
if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
cpi->section_is_fast_motion = 1;
else
cpi->section_is_fast_motion = 0;
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
@@ -1894,16 +1892,6 @@ void vp8_second_pass(VP8_COMP *cpi)
// Is this a GF / ARF (Note that a KF is always also a GF)
if (cpi->frames_till_gf_update_due == 0)
{
// Update monitor of the bits per error observed so far.
// Done once per gf group based on what has gone before
// so do nothing if this is the first frame.
if (cpi->common.current_video_frame > 0)
{
cpi->observed_bpe =
(double)(cpi->clip_bits_total - cpi->bits_left) /
cpi->modified_error_used;
}
// Define next gf group and assign bits to it
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
define_gf_group(cpi, &this_frame_copy);
@@ -1992,14 +1980,7 @@ void vp8_second_pass(VP8_COMP *cpi)
cpi->ni_av_qi = cpi->worst_quality;
}
}
// The last few frames of a clip almost always have to few or too many
// bits and for the sake of over exact rate control we dont want to make
// radical adjustments to the allowed quantizer range just to use up a
// few surplus bits or get beneath the target rate.
else if ( (cpi->common.current_video_frame <
(((unsigned int)cpi->total_stats->count * 255)>>8)) &&
((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
(unsigned int)cpi->total_stats->count) )
else
{
if (frames_left < 1)
frames_left = 1;
@@ -2218,7 +2199,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
// Calculate the number of bits that should be assigned to the kf group.
if ((cpi->bits_left > 0) && ((int)cpi->modified_error_left > 0))
if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0))
{
// Max for a single normal frame (not key frame)
int max_bits = frame_max_bits(cpi);
@@ -2230,7 +2211,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// complexity of the section
cpi->kf_group_bits = (long long)( cpi->bits_left *
( kf_group_err /
cpi->modified_error_left ));
cpi->modified_total_error_left ));
// Clip based on maximum per frame rate defined by the user.
max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key;
@@ -2363,7 +2344,17 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
vp8_avg_stats(&sectionstats);
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
if (sectionstats.pcnt_motion < .17)
cpi->section_is_low_motion = 1;
else
cpi->section_is_low_motion = 0;
if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
cpi->section_is_fast_motion = 1;
else
cpi->section_is_fast_motion = 0;
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
// if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
@@ -2483,7 +2474,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double alt_kf_grp_bits =
((double)cpi->bits_left *
(kf_mod_err * (double)cpi->frames_to_key) /
DOUBLE_DIVIDE_CHECK(cpi->modified_error_left));
DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left));
alt_kf_bits = (int)((double)kf_boost *
(alt_kf_grp_bits / (double)allocation_chunks));
@@ -2501,7 +2492,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
alt_kf_bits =
(int)((double)cpi->bits_left *
(kf_mod_err /
DOUBLE_DIVIDE_CHECK(cpi->modified_error_left)));
DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)));
if (alt_kf_bits > cpi->kf_bits)
{
@@ -2521,7 +2512,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// Adjust the count of total modified error left.
// The count of bits left is adjusted elsewhere based on real coded frame sizes
cpi->modified_error_left -= kf_group_err;
cpi->modified_total_error_left -= kf_group_err;
if (cpi->oxcf.allow_spatial_resampling)
{

View File

@@ -40,12 +40,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c;
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c;
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c;
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c;
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c;
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c;
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c;
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c;
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c;
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c;
@@ -94,8 +88,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.search.full_search = vp8_full_search_sad;
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
#endif
// Pure C:

View File

@@ -913,8 +913,7 @@ int vp8_diamond_search_sad
int *num00,
vp8_variance_fn_ptr_t *fn_ptr,
int *mvsadcost[2],
int *mvcost[2],
MV *center_mv
int *mvcost[2]
)
{
int i, j, step;
@@ -941,8 +940,6 @@ int vp8_diamond_search_sad
unsigned char *check_here;
int thissad;
*num00 = 0;
// Work out the start point for the search
in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
best_address = in_what;
@@ -952,7 +949,7 @@ int vp8_diamond_search_sad
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
{
// Check the starting position
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
}
// search_param determines the length of the initial step and hence the number of iterations
@@ -964,6 +961,8 @@ int vp8_diamond_search_sad
best_mv->row = ref_row;
best_mv->col = ref_col;
*num00 = 0;
for (step = 0; step < tot_steps ; step++)
{
for (j = 0 ; j < x->searches_per_step ; j++)
@@ -983,7 +982,7 @@ int vp8_diamond_search_sad
{
this_mv.row = this_row_offset << 3;
this_mv.col = this_col_offset << 3;
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
@@ -1014,7 +1013,7 @@ int vp8_diamond_search_sad
return INT_MAX;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
}
int vp8_diamond_search_sadx4
@@ -1029,8 +1028,7 @@ int vp8_diamond_search_sadx4
int *num00,
vp8_variance_fn_ptr_t *fn_ptr,
int *mvsadcost[2],
int *mvcost[2],
MV *center_mv
int *mvcost[2]
)
{
int i, j, step;
@@ -1057,8 +1055,6 @@ int vp8_diamond_search_sadx4
unsigned char *check_here;
unsigned int thissad;
*num00 = 0;
// Work out the start point for the search
in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
best_address = in_what;
@@ -1068,7 +1064,7 @@ int vp8_diamond_search_sadx4
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
{
// Check the starting position
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
}
// search_param determines the length of the initial step and hence the number of iterations
@@ -1080,6 +1076,8 @@ int vp8_diamond_search_sadx4
best_mv->row = ref_row;
best_mv->col = ref_col;
*num00 = 0;
for (step = 0; step < tot_steps ; step++)
{
int all_in = 1, t;
@@ -1110,7 +1108,7 @@ int vp8_diamond_search_sadx4
{
this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
sad_array[t] += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
if (sad_array[t] < bestsad)
{
@@ -1139,7 +1137,7 @@ int vp8_diamond_search_sadx4
{
this_mv.row = this_row_offset << 3;
this_mv.col = this_col_offset << 3;
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
@@ -1170,12 +1168,12 @@ int vp8_diamond_search_sadx4
return INT_MAX;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
}
#if !(CONFIG_REALTIME_ONLY)
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
{
unsigned char *what = (*(b->base_src) + b->src);
int what_stride = b->src_stride;
@@ -1213,7 +1211,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
// Baseline value at the centre
//bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
}
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1241,7 +1239,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
this_mv.col = c << 3;
//thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
//thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
if (thissad < bestsad)
{
@@ -1260,12 +1258,12 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
if (bestsad < INT_MAX)
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
else
return INT_MAX;
}
int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
{
unsigned char *what = (*(b->base_src) + b->src);
int what_stride = b->src_stride;
@@ -1303,7 +1301,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
{
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
}
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
@@ -1325,7 +1323,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
check_here = r * mv_stride + in_what + col_min;
c = col_min;
while ((c + 2) < col_max)
while ((c + 3) < col_max)
{
int i;
@@ -1338,7 +1336,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
if (thissad < bestsad)
{
this_mv.col = c << 3;
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
@@ -1361,7 +1359,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
if (thissad < bestsad)
{
this_mv.col = c << 3;
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
@@ -1383,165 +1381,13 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
if (bestsad < INT_MAX)
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
else
return INT_MAX;
}
#endif
int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
{
unsigned char *what = (*(b->base_src) + b->src);
int what_stride = b->src_stride;
unsigned char *in_what;
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
unsigned char *bestaddress;
MV *best_mv = &d->bmi.mv.as_mv;
MV this_mv;
int bestsad = INT_MAX;
int r, c;
unsigned char *check_here;
unsigned int thissad;
int ref_row = ref_mv->row >> 3;
int ref_col = ref_mv->col >> 3;
int row_min = ref_row - distance;
int row_max = ref_row + distance;
int col_min = ref_col - distance;
int col_max = ref_col + distance;
unsigned short sad_array8[8];
unsigned int sad_array[3];
// Work out the mid point for the search
in_what = *(d->base_pre) + d->pre;
bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
best_mv->row = ref_row;
best_mv->col = ref_col;
// We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
{
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
}
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
if (col_min < x->mv_col_min)
col_min = x->mv_col_min;
if (col_max > x->mv_col_max)
col_max = x->mv_col_max;
if (row_min < x->mv_row_min)
row_min = x->mv_row_min;
if (row_max > x->mv_row_max)
row_max = x->mv_row_max;
for (r = row_min; r < row_max ; r++)
{
this_mv.row = r << 3;
check_here = r * mv_stride + in_what + col_min;
c = col_min;
while ((c + 7) < col_max)
{
int i;
fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
for (i = 0; i < 8; i++)
{
thissad = (unsigned int)sad_array8[i];
if (thissad < bestsad)
{
this_mv.col = c << 3;
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
bestsad = thissad;
best_mv->row = r;
best_mv->col = c;
bestaddress = check_here;
}
}
check_here++;
c++;
}
}
while ((c + 2) < col_max)
{
int i;
fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
for (i = 0; i < 3; i++)
{
thissad = sad_array[i];
if (thissad < bestsad)
{
this_mv.col = c << 3;
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
bestsad = thissad;
best_mv->row = r;
best_mv->col = c;
bestaddress = check_here;
}
}
check_here++;
c++;
}
}
while (c < col_max)
{
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
if (thissad < bestsad)
{
this_mv.col = c << 3;
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
if (thissad < bestsad)
{
bestsad = thissad;
best_mv->row = r;
best_mv->col = c;
bestaddress = check_here;
}
}
check_here ++;
c ++;
}
}
this_mv.row = best_mv->row << 3;
this_mv.col = best_mv->col << 3;
if (bestsad < INT_MAX)
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
else
return INT_MAX;
}
#ifdef ENTROPY_STATS
void print_mode_context(void)
{

View File

@@ -24,7 +24,7 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS+3)) - 8) // Max full pel mv specified in 1/8 pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
#define MAX_POSSIBLE_MV (1 << 11) // Maximum MV in 1/8 pel units
extern void print_mode_context(void);
extern int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight);
@@ -67,8 +67,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
int distance, \
vp8_variance_fn_ptr_t *fn_ptr, \
int *mvcost[2], \
int *mvsadcost[2], \
MV *center_mv \
int *mvsadcost[2] \
)
#define prototype_diamond_search_sad(sym)\
@@ -84,8 +83,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
int *num00, \
vp8_variance_fn_ptr_t *fn_ptr, \
int *mvsadcost[2], \
int *mvcost[2], \
MV *center_mv \
int *mvcost[2] \
)
#if ARCH_X86 || ARCH_X86_64
@@ -95,7 +93,6 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
typedef prototype_full_search_sad(*vp8_full_search_fn_t);
extern prototype_full_search_sad(vp8_full_search_sad);
extern prototype_full_search_sad(vp8_full_search_sadx3);
extern prototype_full_search_sad(vp8_full_search_sadx8);
typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t);
extern prototype_diamond_search_sad(vp8_diamond_search_sad);

View File

@@ -73,7 +73,6 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi);
int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi);
static void set_default_lf_deltas(VP8_COMP *cpi);
@@ -175,6 +174,17 @@ static const int kf_high_motion_minq[QINDEX_RANGE] =
27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,
35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48,
};
/*static const int kf_minq[QINDEX_RANGE] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10,10,11,11,12,12,13,13,14,14,
15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,38
};*/
static const int gf_low_motion_minq[QINDEX_RANGE] =
{
0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
@@ -208,16 +218,27 @@ static const int gf_high_motion_minq[QINDEX_RANGE] =
41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54,
55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80,
};
/*static const int gf_arf_minq[QINDEX_RANGE] =
{
0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4,
4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
9,10,10,10,11,11,11,12,12,12,13,13,13,14,14,14,
15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,39,
39,40,40,41,41,42,42,43,43,44,45,46,47,48,49,50,
51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66
};*/
static const int inter_minq[QINDEX_RANGE] =
{
0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9,
9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20,
20,21,22,22,23,24,24,25,26,27,27,28,29,30,30,31,
32,33,33,34,35,36,36,37,38,39,39,40,41,42,42,43,
44,45,46,46,47,48,49,50,50,51,52,53,54,55,55,56,
57,58,59,60,60,61,62,63,64,65,66,67,67,68,69,70,
71,72,73,74,75,75,76,77,78,79,80,81,82,83,84,85,
86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
0,0,0,0,1,1,2,3,3,4,4,5,6,6,7,7,
8,8,9,9,10,11,11,12,12,13,13,14,14,15,15,16,
16,17,17,17,18,18,19,19,20,20,21,21,22,22,22,23,
23,24,24,24,25,25,26,27,28,28,29,30,31,32,33,34,
35,35,36,37,38,39,39,40,41,42,43,43,44,45,46,47,
47,48,49,49,51,52,53,54,54,55,56,56,57,57,58,58,
59,59,60,61,61,62,62,63,64,64,65,66,67,67,68,69,
69,70,71,71,72,73,74,75,76,76,77,78,79,80,81,81,
};
void vp8_initialize()
@@ -262,21 +283,6 @@ static void setup_features(VP8_COMP *cpi)
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
{
// Delete last frame MV storage buffers
if (cpi->lfmv != 0)
vpx_free(cpi->lfmv);
cpi->lfmv = 0;
if (cpi->lf_ref_frame_sign_bias != 0)
vpx_free(cpi->lf_ref_frame_sign_bias);
cpi->lf_ref_frame_sign_bias = 0;
if (cpi->lf_ref_frame != 0)
vpx_free(cpi->lf_ref_frame);
cpi->lf_ref_frame = 0;
// Delete sementation map
if (cpi->segmentation_map != 0)
@@ -325,15 +331,8 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
cpi->mb.pip = 0;
if(cpi->total_stats)
vpx_free(cpi->total_stats);
cpi->total_stats = 0;
if(cpi->this_frame_stats)
vpx_free(cpi->this_frame_stats);
cpi->this_frame_stats = 0;
vpx_free(cpi->total_stats);
vpx_free(cpi->this_frame_stats);
}
static void enable_segmentation(VP8_PTR ptr)
@@ -564,7 +563,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
int Speed = cpi->Speed;
int i;
VP8_COMMON *cm = &cpi->common;
int last_improved_quant = sf->improved_quant;
// Initialise default mode frequency sampling variables
for (i = 0; i < MAX_MODES; i ++)
@@ -591,7 +589,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->max_fs_radius = 32;
sf->iterative_sub_pixel = 1;
sf->optimize_coefficients = 1;
sf->use_fastquant_for_pick = 0;
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -685,32 +682,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_NEARG ] = 1000;
sf->thresh_mult[THR_NEARA ] = 1000;
#if 1
sf->thresh_mult[THR_ZEROMV ] = 0;
sf->thresh_mult[THR_ZEROG ] = 0;
sf->thresh_mult[THR_ZEROA ] = 0;
sf->thresh_mult[THR_NEARESTMV] = 0;
sf->thresh_mult[THR_NEARESTG ] = 0;
sf->thresh_mult[THR_NEARESTA ] = 0;
sf->thresh_mult[THR_NEARMV ] = 0;
sf->thresh_mult[THR_NEARG ] = 0;
sf->thresh_mult[THR_NEARA ] = 0;
// sf->thresh_mult[THR_DC ] = 0;
// sf->thresh_mult[THR_V_PRED ] = 1000;
// sf->thresh_mult[THR_H_PRED ] = 1000;
// sf->thresh_mult[THR_B_PRED ] = 2000;
// sf->thresh_mult[THR_TM ] = 1000;
sf->thresh_mult[THR_NEWMV ] = 1000;
sf->thresh_mult[THR_NEWG ] = 1000;
sf->thresh_mult[THR_NEWA ] = 1000;
sf->thresh_mult[THR_SPLITMV ] = 1700;
sf->thresh_mult[THR_SPLITG ] = 4500;
sf->thresh_mult[THR_SPLITA ] = 4500;
#else
sf->thresh_mult[THR_NEWMV ] = 1500;
sf->thresh_mult[THR_NEWG ] = 1500;
sf->thresh_mult[THR_NEWA ] = 1500;
@@ -718,7 +689,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITMV ] = 5000;
sf->thresh_mult[THR_SPLITG ] = 10000;
sf->thresh_mult[THR_SPLITA ] = 10000;
#endif
sf->full_freq[0] = 15;
sf->full_freq[1] = 31;
@@ -790,7 +761,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITA ] = 20000;
}
sf->use_fastquant_for_pick = 1;
sf->improved_quant = 0;
sf->improved_dct = 0;
sf->first_step = 1;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -798,8 +770,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (Speed > 1)
{
sf->use_fastquant_for_pick = 0;
cpi->mode_check_freq[THR_SPLITG] = 15;
cpi->mode_check_freq[THR_SPLITA] = 15;
cpi->mode_check_freq[THR_SPLITMV] = 7;
@@ -833,13 +803,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
sf->thresh_mult[THR_SPLITA ] = 50000;
}
sf->first_step = 1;
sf->improved_quant = 0;
sf->improved_dct = 0;
// Only do recode loop on key frames, golden frames and
// alt ref frames
// Only do recode loop on key frames and golden frames
sf->recode_loop = 2;
sf->full_freq[0] = 31;
@@ -1298,8 +1262,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
{
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
}
if (cpi->sf.improved_quant != last_improved_quant)
vp8cx_init_quantizer(cpi);
#if CONFIG_RUNTIME_CPU_DETECT
cpi->mb.e_mbd.rtcd = &cpi->common.rtcd;
@@ -1367,9 +1329,6 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi)
static int vp8_alloc_partition_data(VP8_COMP *cpi)
{
if(cpi->mb.pip)
vpx_free(cpi->mb.pip);
cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
(cpi->common.mb_rows + 1),
sizeof(PARTITION_INFO));
@@ -1437,16 +1396,8 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
if(cpi->total_stats)
vpx_free(cpi->total_stats);
cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
if(cpi->this_frame_stats)
vpx_free(cpi->this_frame_stats);
cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
if(!cpi->total_stats || !cpi->this_frame_stats)
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate firstpass stats");
@@ -2194,10 +2145,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->alt_is_last = 0 ;
cpi->gold_is_alt = 0 ;
// allocate memory for storing last frame's MVs for MV prediction.
CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int_mv)));
CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
// Create the encoder segmentation map and set all entries to 0
CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
@@ -2253,8 +2201,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
init_context_counters();
#endif
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90<<12;
cpi->frames_since_key = 8; // Give a sensible default for the first frame.
cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -2395,7 +2341,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
@@ -2405,7 +2350,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
@@ -2415,7 +2359,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
@@ -2425,7 +2368,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
@@ -2435,7 +2377,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
#if !(CONFIG_REALTIME_ONLY)
@@ -3486,37 +3427,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
#endif
// return of 0 means drop frame
// Function to test for conditions that indeicate we should loop
// back and recode a frame.
static BOOL recode_loop_test( VP8_COMP *cpi,
int high_limit, int low_limit,
int q, int maxq, int minq )
{
BOOL force_recode = FALSE;
VP8_COMMON *cm = &cpi->common;
// Is frame recode allowed at all
// Yes if either recode mode 1 is selected or mode two is selcted
// and the frame is a key frame. golden frame or alt_ref_frame
if ( (cpi->sf.recode_loop == 1) ||
( (cpi->sf.recode_loop == 2) &&
( (cm->frame_type == KEY_FRAME) ||
cm->refresh_golden_frame ||
cm->refresh_alt_ref_frame ) ) )
{
// General over and under shoot tests
if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
((cpi->projected_frame_size < low_limit) && (q > minq)) )
{
force_recode = TRUE;
}
// Specific rate control mode related tests
// TBD
}
return force_recode;
}
static void encode_frame_to_data_rate
(
VP8_COMP *cpi,
@@ -3579,18 +3489,8 @@ static void encode_frame_to_data_rate
cpi->zbin_over_quant = 0;
cpi->zbin_mode_boost = 0;
// Enable or disable mode based tweaking of the zbin
// For 2 Pass Only used where GF/ARF prediction quality
// is above a threshold
cpi->zbin_mode_boost = 0;
// Enable mode based tweaking of the zbin
cpi->zbin_mode_boost_enabled = TRUE;
if (cpi->pass == 2)
{
if ( cpi->gfu_boost <= 400 )
{
cpi->zbin_mode_boost_enabled = FALSE;
}
}
// Current default encoder behaviour for the altref sign bias
if (cpi->source_alt_ref_active)
@@ -3871,16 +3771,17 @@ static void encode_frame_to_data_rate
vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
// Limit Q range for the adaptive loop.
// Limit Q range for the adaptive loop (Values not clipped to range 20-60 as in VP8).
bottom_index = cpi->active_best_quality;
top_index = cpi->active_worst_quality;
q_low = cpi->active_best_quality;
q_high = cpi->active_worst_quality;
vp8_save_coding_context(cpi);
loop_count = 0;
q_low = cpi->best_quality;
q_high = cpi->worst_quality;
scale_and_extend_source(cpi->un_scaled_source, cpi);
#if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC
@@ -3916,6 +3817,7 @@ static void encode_frame_to_data_rate
if (cm->frame_type == KEY_FRAME)
{
vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0, RTCD(postproc));
cpi->ppi.frame = 0;
}
else
{
@@ -3927,6 +3829,10 @@ static void encode_frame_to_data_rate
{
src += cpi->Source->y_stride * (cpi->Source->y_height - 1);
}
//temp_filter(&cpi->ppi,src,src,
// cm->last_frame.y_width * cm->last_frame.y_height,
// cpi->oxcf.noise_sensitivity);
}
}
@@ -4057,13 +3963,15 @@ static void encode_frame_to_data_rate
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
q_low = cpi->best_quality;
q_high = cpi->worst_quality;
vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
// Limit Q range for the adaptive loop.
// Limit Q range for the adaptive loop (Values not clipped to range 20-60 as in VP8).
bottom_index = cpi->active_best_quality;
top_index = cpi->active_worst_quality;
q_low = cpi->active_best_quality;
q_high = cpi->active_worst_quality;
loop_count++;
Loop = TRUE;
@@ -4103,18 +4011,19 @@ static void encode_frame_to_data_rate
#if !(CONFIG_REALTIME_ONLY)
// Is the projected frame size out of range and are we allowed to attempt to recode.
if ( recode_loop_test( cpi,
frame_over_shoot_limit, frame_under_shoot_limit,
Q, top_index, bottom_index ) )
if (((cpi->sf.recode_loop == 1) ||
((cpi->sf.recode_loop == 2) && (cm->refresh_golden_frame || (cm->frame_type == KEY_FRAME)))) &&
(((cpi->projected_frame_size > frame_over_shoot_limit) && (Q < top_index)) ||
//((cpi->projected_frame_size > frame_over_shoot_limit ) && (Q == top_index) && (cpi->zbin_over_quant < ZBIN_OQ_MAX)) ||
((cpi->projected_frame_size < frame_under_shoot_limit) && (Q > bottom_index)))
)
{
int last_q = Q;
int Retries = 0;
// Frame size out of permitted range:
// Update correction factor & compute new Q to try...
// Frame is too large
if (cpi->projected_frame_size > cpi->this_frame_target)
if (cpi->projected_frame_size > frame_over_shoot_limit)
{
//if ( cpi->zbin_over_quant == 0 )
q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
@@ -4158,7 +4067,6 @@ static void encode_frame_to_data_rate
overshoot_seen = TRUE;
}
// Frame is too small
else
{
if (cpi->zbin_over_quant == 0)
@@ -4252,36 +4160,6 @@ static void encode_frame_to_data_rate
}
#endif
// Update the GF useage maps.
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
// This frame's MVs are saved and will be used in next frame's MV prediction.
if(cm->show_frame) //do not save for altref frame
{
int mb_row;
int mb_col;
MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays.
//static int last_video_frame = 0;
if(cm->frame_type != KEY_FRAME)
{
for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
{
for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
{
if(tmp->mbmi.ref_frame != INTRA_FRAME)
cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride)].as_int = tmp->mbmi.mv.as_int;
cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride)] = tmp->mbmi.ref_frame;
tmp++;
}
}
}
}
// Update the GF useage maps.
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
@@ -4340,11 +4218,10 @@ static void encode_frame_to_data_rate
{
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
cm->last_frame_type = cm->frame_type;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
}
/* Move storing frame_type out of the above loop since it is also needed in motion search besides loopfilter */
cm->last_frame_type = cm->frame_type;
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
@@ -4656,7 +4533,7 @@ static void encode_frame_to_data_rate
}
else
{
if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame && (cpi->common.frame_type != KEY_FRAME))
if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame)
// Update the alternate reference frame and stats as appropriate.
update_alt_ref_frame_and_stats(cpi);
else
@@ -4979,7 +4856,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
{
int thiserr;
cpi->oxcf.arnr_strength = i;
vp8_temporal_filter_prepare_c(cpi);
vp8cx_temp_filter_c(cpi);
thiserr = vp8_calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer,
&cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));
@@ -4994,7 +4871,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
if (besti != -1)
{
cpi->oxcf.arnr_strength = besti;
vp8_temporal_filter_prepare_c(cpi);
vp8cx_temp_filter_c(cpi);
s = &cpi->alt_ref_buffer;
// FWG not sure if I need to copy this data for the Alt Ref frame
@@ -5006,7 +4883,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
s = &cpi->src_buffer[cpi->last_alt_ref_sei];
#else
vp8_temporal_filter_prepare_c(cpi);
vp8cx_temp_filter_c(cpi);
s = &cpi->alt_ref_buffer;
// FWG not sure if I need to copy this data for the Alt Ref frame
@@ -5090,16 +4967,17 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
*frame_flags = cpi->source_frame_flags;
#if CONFIG_PSNR
if (cpi->source_time_stamp < cpi->first_time_stamp_ever)
{
cpi->first_time_stamp_ever = cpi->source_time_stamp;
cpi->last_end_time_stamp_seen = cpi->source_time_stamp;
}
#endif
// adjust frame rates based on timestamps given
if (!cm->refresh_alt_ref_frame)
{
if (cpi->source_time_stamp == cpi->first_time_stamp_ever)
if (cpi->last_time_stamp_seen == 0)
{
double this_fps = 10000000.000 / (cpi->source_end_time_stamp - cpi->source_time_stamp);
@@ -5107,8 +4985,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
}
else
{
long long nanosecs = cpi->source_end_time_stamp
- cpi->last_end_time_stamp_seen;
long long nanosecs = cpi->source_time_stamp - cpi->last_time_stamp_seen;
double this_fps = 10000000.000 / nanosecs;
vp8_new_frame_rate(cpi, (7 * cpi->oxcf.frame_rate + this_fps) / 8);
@@ -5116,7 +4993,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
}
cpi->last_time_stamp_seen = cpi->source_time_stamp;
cpi->last_end_time_stamp_seen = cpi->source_end_time_stamp;
}
if (cpi->compressor_speed == 2)
@@ -5332,7 +5208,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
return 0;
}
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags)
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
{
VP8_COMP *cpi = (VP8_COMP *) comp;
@@ -5342,7 +5218,7 @@ int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflag
{
int ret;
#if CONFIG_POSTPROC
ret = vp8_post_proc_frame(&cpi->common, dest, flags);
ret = vp8_post_proc_frame(&cpi->common, dest, deblock_level, noise_level, flags);
#else
if (cpi->common.frame_to_show)
@@ -5435,12 +5311,12 @@ int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert
{
VP8_COMP *cpi = (VP8_COMP *) comp;
if (horiz_mode <= ONETWO)
if (horiz_mode >= NORMAL && horiz_mode <= ONETWO)
cpi->common.horiz_scale = horiz_mode;
else
return -1;
if (vert_mode <= ONETWO)
if (vert_mode >= NORMAL && vert_mode <= ONETWO)
cpi->common.vert_scale = vert_mode;
else
return -1;

View File

@@ -18,6 +18,7 @@
#include "treewriter.h"
#include "tokenize.h"
#include "onyxc_int.h"
#include "preproc.h"
#include "variance.h"
#include "dct.h"
#include "encodemb.h"
@@ -27,7 +28,6 @@
#include "vpx_ports/mem.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "mcomp.h"
#include "temporal_filter.h"
//#define SPEEDSTATS 1
#define MIN_GF_INTERVAL 4
@@ -46,8 +46,6 @@
#define MAX_THRESHMULT 512
#define GF_ZEROMV_ZBIN_BOOST 24
#define LF_ZEROMV_ZBIN_BOOST 12
#define MV_ZBIN_BOOST 4
#define ZBIN_OQ_MAX 192
#define VP8_TEMPORAL_ALT_REF 1
@@ -182,8 +180,6 @@ typedef struct
int first_step;
int optimize_coefficients;
int use_fastquant_for_pick;
} SPEED_FEATURES;
typedef struct
@@ -231,7 +227,6 @@ typedef struct VP8_ENCODER_RTCD
vp8_encodemb_rtcd_vtable_t encodemb;
vp8_quantize_rtcd_vtable_t quantize;
vp8_search_rtcd_vtable_t search;
vp8_temporal_rtcd_vtable_t temporal;
} VP8_ENCODER_RTCD;
enum
@@ -244,12 +239,6 @@ enum
BLOCK_MAX_SEGMENTS
};
typedef union
{
unsigned int as_int;
MV as_mv;
} int_mv; /* facilitates rapid equality tests */
typedef struct
{
@@ -271,9 +260,6 @@ typedef struct
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]);
MACROBLOCK mb;
@@ -290,14 +276,14 @@ typedef struct
unsigned int source_frame_flags;
YV12_BUFFER_CONFIG scaled_source;
int source_buffer_count; // number of src_buffers in use for lagged encoding
int source_encode_index; // index of buffer in src_buffer to encode
int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
int source_alt_ref_active; // an alt ref frame has been encoded and is usable
int source_buffer_count;
int source_encode_index;
int source_alt_ref_pending;
int source_alt_ref_active;
int last_alt_ref_sei; // index into src_buffers of frame used as alt reference
int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame
int is_next_src_alt_ref; // source of next frame to encode is an exact copy of an alt ref frame
int last_alt_ref_sei;
int is_src_frame_alt_ref;
int is_next_src_alt_ref;
int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
@@ -333,7 +319,6 @@ typedef struct
int mvcostmultiplier;
int subseqblockweight;
int errthresh;
unsigned int activity_avg;
int RDMULT;
int RDDIV ;
@@ -414,7 +399,6 @@ typedef struct
int inter_frame_target;
double output_frame_rate;
long long last_time_stamp_seen;
long long last_end_time_stamp_seen;
long long first_time_stamp_ever;
int ni_av_qi;
@@ -470,6 +454,8 @@ typedef struct
unsigned char *output_partition2;
size_t output_partition2size;
pre_proc_instance ppi;
int frames_to_key;
int gfu_boost;
int kf_boost;
@@ -480,17 +466,11 @@ typedef struct
double start_tot_err_left;
double min_error;
double modified_error_total;
double modified_error_used;
double modified_error_left;
double clip_bpe;
double observed_bpe;
double modified_total_error_left;
double avg_iiratio;
int target_bandwidth;
long long bits_left;
long long clip_bits_total;
FIRSTPASS_STATS *total_stats;
FIRSTPASS_STATS *this_frame_stats;
FIRSTPASS_STATS *stats_in, *stats_in_end;
@@ -631,6 +611,9 @@ typedef struct
unsigned int tempdata2;
int base_skip_false_prob[128];
unsigned int section_is_low_motion;
unsigned int section_benefits_from_aggresive_q;
unsigned int section_is_fast_motion;
unsigned int section_intra_rating;
double section_max_qfactor;
@@ -678,10 +661,6 @@ typedef struct
unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0
int gf_active_count;
//Store last frame's MV info for next frame MV prediction
int_mv *lfmv;
int *lf_ref_frame_sign_bias;
int *lf_ref_frame;
} VP8_COMP;
@@ -691,8 +670,6 @@ void vp8_encode_frame(VP8_COMP *cpi);
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
int rd_cost_intra_mb(MACROBLOCKD *x);
void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);

View File

@@ -685,7 +685,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
#if 0
// Initial step Search
bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost, &best_ref_mv1);
bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -698,7 +698,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
num00--;
else
{
thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost, &best_ref_mv1);
thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost);
if (thissme < bestsme)
{
@@ -724,7 +724,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
}
else
{
bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb < 9
bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -743,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
num00--;
else
{
thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb = 9
thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
if (thissme < bestsme)
{

251
vp8/encoder/preproc.c Normal file
View File

@@ -0,0 +1,251 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/****************************************************************************
*
* Module Title : preproc.c
*
* Description : Simple pre-processor.
*
****************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include "memory.h"
#include "preproc7.h"
#include "vpx_mem/vpx_mem.h"
/****************************************************************************
* Macros
****************************************************************************/
#define FRAMECOUNT 7
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
/****************************************************************************
* Imports
****************************************************************************/
extern void vp8_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
/****************************************************************************
* Exported Global Variables
****************************************************************************/
void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
void temp_filter_mmx
(
pre_proc_instance *ppi,
unsigned char *s,
unsigned char *d,
int bytes,
int strength
);
void temp_filter_wmt
(
pre_proc_instance *ppi,
unsigned char *s,
unsigned char *d,
int bytes,
int strength
);
/****************************************************************************
*
* ROUTINE : temp_filter_c
*
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
* unsigned char *s : Pointer to source frame.
* unsigned char *d : Pointer to destination frame.
* int bytes : Number of bytes to filter.
* int strength : Strength of filter to apply.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Performs a closesness adjusted temporarl blur
*
* SPECIAL NOTES : Destination frame can be same as source frame.
*
****************************************************************************/
void temp_filter_c
(
pre_proc_instance *ppi,
unsigned char *s,
unsigned char *d,
int bytes,
int strength
)
{
int byte = 0;
unsigned char *frameptr = ppi->frame_buffer;
if (ppi->frame == 0)
{
do
{
int frame = 0;
do
{
*frameptr = s[byte];
++frameptr;
++frame;
}
while (frame < FRAMECOUNT);
d[byte] = s[byte];
++byte;
}
while (byte < bytes);
}
else
{
int modifier;
int offset = (ppi->frame % FRAMECOUNT);
do
{
int accumulator = 0;
int count = 0;
int frame = 0;
frameptr[offset] = s[byte];
do
{
int pixel_value = *frameptr;
modifier = s[byte];
modifier -= pixel_value;
modifier *= modifier;
modifier >>= strength;
modifier *= 3;
if (modifier > 16)
modifier = 16;
modifier = 16 - modifier;
accumulator += modifier * pixel_value;
count += modifier;
frameptr++;
++frame;
}
while (frame < FRAMECOUNT);
accumulator += (count >> 1);
accumulator *= ppi->fixed_divide[count];
accumulator >>= 16;
d[byte] = accumulator;
++byte;
}
while (byte < bytes);
}
++ppi->frame;
}
/****************************************************************************
*
* ROUTINE : delete_pre_proc
*
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Deletes a pre-processing instance.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void delete_pre_proc(pre_proc_instance *ppi)
{
if (ppi->frame_buffer_alloc)
vpx_free(ppi->frame_buffer_alloc);
ppi->frame_buffer_alloc = 0;
ppi->frame_buffer = 0;
if (ppi->fixed_divide_alloc)
vpx_free(ppi->fixed_divide_alloc);
ppi->fixed_divide_alloc = 0;
ppi->fixed_divide = 0;
}
/****************************************************************************
*
* ROUTINE : init_pre_proc
*
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
* int frame_size : Number of bytes in one frame.
*
* OUTPUTS : None.
*
* RETURNS : int: 1 if successful, 0 if failed.
*
* FUNCTION : Initializes prepprocessor instance.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
int init_pre_proc7(pre_proc_instance *ppi, int frame_size)
{
int i;
int mmx_enabled;
int xmm_enabled;
int wmt_enabled;
vp8_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
if (wmt_enabled)
temp_filter = temp_filter_wmt;
else if (mmx_enabled)
temp_filter = temp_filter_mmx;
else
temp_filter = temp_filter_c;
delete_pre_proc(ppi);
ppi->frame_buffer_alloc = vpx_malloc(32 + frame_size * FRAMECOUNT * sizeof(unsigned char));
if (!ppi->frame_buffer_alloc)
{
delete_pre_proc(ppi);
return 0;
}
ppi->frame_buffer = (unsigned char *) ROUNDUP32(ppi->frame_buffer_alloc);
ppi->fixed_divide_alloc = vpx_malloc(32 + 255 * sizeof(unsigned int));
if (!ppi->fixed_divide_alloc)
{
delete_pre_proc(ppi);
return 0;
}
ppi->fixed_divide = (unsigned int *) ROUNDUP32(ppi->fixed_divide_alloc);
for (i = 1; i < 255; i++)
ppi->fixed_divide[i] = 0x10000 / i;
return 1;
}

View File

@@ -16,9 +16,8 @@
#include "entropy.h"
#include "predictdc.h"
#define EXACT_QUANT
#ifdef EXACT_FASTQUANT
//#define EXACT_QUANT
#ifdef EXACT_QUANT
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
{
int i, rc, eob;
@@ -27,7 +26,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
short *coeff_ptr = b->coeff;
short *zbin_ptr = b->zbin;
short *round_ptr = b->round;
short *quant_ptr = b->quant_fast;
short *quant_ptr = b->quant;
short *quant_shift_ptr = b->quant_shift;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
@@ -65,45 +64,6 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
d->eob = eob + 1;
}
#else
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
{
int i, rc, eob;
int zbin;
int x, y, z, sz;
short *coeff_ptr = b->coeff;
short *round_ptr = b->round;
short *quant_ptr = b->quant_fast;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
eob = -1;
for (i = 0; i < 16; i++)
{
rc = vp8_default_zig_zag1d[i];
z = coeff_ptr[rc];
sz = (z >> 31); // sign of z
x = (z ^ sz) - sz; // x = abs(z)
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
qcoeff_ptr[rc] = x; // write to destination
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
if (y)
{
eob = i; // last nonzero coeffs
}
}
d->eob = eob + 1;
}
#endif
#ifdef EXACT_QUANT
void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
{
int i, rc, eob;
@@ -218,6 +178,39 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
}
#else
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
{
int i, rc, eob;
int zbin;
int x, y, z, sz;
short *coeff_ptr = b->coeff;
short *round_ptr = b->round;
short *quant_ptr = b->quant;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
eob = -1;
for (i = 0; i < 16; i++)
{
rc = vp8_default_zig_zag1d[i];
z = coeff_ptr[rc];
sz = (z >> 31); // sign of z
x = (z ^ sz) - sz; // x = abs(z)
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
qcoeff_ptr[rc] = x; // write to destination
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
if (y)
{
eob = i; // last nonzero coeffs
}
}
d->eob = eob + 1;
}
void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
{

View File

@@ -45,48 +45,46 @@ extern int inter_b_modes[10];
// Bits Per MB at different Q (Multiplied by 512)
#define BPER_MB_NORMBITS 9
// Work in progress recalibration of baseline rate tables based on
// the assumption that bits per mb is inversely proportional to the
// quantizer value.
const int vp8_bits_per_mb[2][QINDEX_RANGE] =
{
// Intra case 450000/Qintra
// (Updated 19 March 08) Baseline estimate of INTRA-frame Bits Per MB at each Q:
{
1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000,
409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705,
250000, 236842, 225000, 225000, 214285, 214285, 204545, 204545,
195652, 195652, 187500, 180000, 180000, 173076, 166666, 160714,
155172, 150000, 145161, 140625, 136363, 132352, 128571, 125000,
121621, 121621, 118421, 115384, 112500, 109756, 107142, 104651,
102272, 100000, 97826, 97826, 95744, 93750, 91836, 90000,
88235, 86538, 84905, 83333, 81818, 80357, 78947, 77586,
76271, 75000, 73770, 72580, 71428, 70312, 69230, 68181,
67164, 66176, 65217, 64285, 63380, 62500, 61643, 60810,
60000, 59210, 59210, 58441, 57692, 56962, 56250, 55555,
54878, 54216, 53571, 52941, 52325, 51724, 51136, 50561,
49450, 48387, 47368, 46875, 45918, 45000, 44554, 44117,
43269, 42452, 41666, 40909, 40178, 39473, 38793, 38135,
36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088,
32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662,
674781, 606845, 553905, 524293, 500428, 452540, 435379, 414719,
390970, 371082, 359416, 341807, 336957, 317263, 303724, 298402,
285688, 275237, 268455, 262560, 256038, 248734, 241087, 237615,
229247, 225211, 219112, 213920, 211559, 202714, 198482, 193401,
187866, 183453, 179212, 175965, 171852, 167235, 163972, 160560,
156032, 154349, 151390, 148725, 145708, 142311, 139981, 137700,
134084, 131863, 129746, 128498, 126077, 123461, 121290, 117782,
114883, 112332, 108410, 105685, 103434, 101192, 98587, 95959,
94059, 92017, 89970, 87936, 86142, 84801, 82736, 81106,
79668, 78135, 76641, 75103, 73943, 72693, 71401, 70098,
69165, 67901, 67170, 65987, 64923, 63534, 62378, 61302,
59921, 58941, 57844, 56782, 55960, 54973, 54257, 53454,
52230, 50938, 49962, 49190, 48288, 47270, 46738, 46037,
45020, 44027, 43216, 42287, 41594, 40702, 40081, 39414,
38282, 37627, 36987, 36375, 35808, 35236, 34710, 34162,
33659, 33327, 32751, 32384, 31936, 31461, 30982, 30582,
},
// Inter case 285000/Qinter
// (Updated 19 March 08) Baseline estimate of INTER-frame Bits Per MB at each Q:
{
712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090,
237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000,
142500, 135714, 129545, 123913, 118750, 114000, 109615, 105555,
101785, 98275, 95000, 91935, 89062, 86363, 83823, 81428,
79166, 77027, 75000, 73076, 71250, 69512, 67857, 66279,
64772, 63333, 61956, 60638, 59375, 58163, 57000, 55882,
54807, 53773, 52777, 51818, 50892, 50000, 49137, 47500,
45967, 44531, 43181, 41911, 40714, 39583, 38513, 37500,
36538, 35625, 34756, 33928, 33139, 32386, 31666, 30978,
30319, 29687, 29081, 28500, 27941, 27403, 26886, 26388,
25909, 25446, 25000, 24568, 23949, 23360, 22800, 22265,
21755, 21268, 20802, 20357, 19930, 19520, 19127, 18750,
18387, 18037, 17701, 17378, 17065, 16764, 16473, 16101,
15745, 15405, 15079, 14766, 14467, 14179, 13902, 13636,
13380, 13133, 12895, 12666, 12445, 12179, 11924, 11632,
11445, 11220, 11003, 10795, 10594, 10401, 10215, 10035,
497401, 426316, 372064, 352732, 335763, 283921, 273848, 253321,
233181, 217727, 210030, 196685, 194836, 178396, 167753, 164116,
154119, 146929, 142254, 138488, 133591, 127741, 123166, 120226,
114188, 111756, 107882, 104749, 102522, 96451, 94424, 90905,
87286, 84931, 82111, 80534, 77610, 74700, 73037, 70715,
68006, 67235, 65374, 64009, 62134, 60180, 59105, 57691,
55509, 54512, 53318, 52693, 51194, 49840, 48944, 46980,
45668, 44177, 42348, 40994, 39859, 38889, 37717, 36391,
35482, 34622, 33795, 32756, 32002, 31492, 30573, 29737,
29152, 28514, 27941, 27356, 26859, 26329, 25874, 25364,
24957, 24510, 24290, 23689, 23380, 22845, 22481, 22066,
21587, 21219, 20880, 20452, 20260, 19926, 19661, 19334,
18915, 18391, 18046, 17833, 17441, 17105, 16888, 16729,
16383, 16023, 15706, 15442, 15222, 14938, 14673, 14452,
14005, 13807, 13611, 13447, 13223, 13102, 12963, 12801,
12627, 12534, 12356, 12228, 12056, 11907, 11746, 11643,
}
};
@@ -326,7 +324,6 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
cpi->frames_till_gf_update_due = cpi->goldfreq;
cpi->common.refresh_golden_frame = TRUE;
cpi->common.refresh_alt_ref_frame = TRUE;
}
void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi)
@@ -1037,7 +1034,9 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
gf_frame_useage = pct_gf_active;
// Is a fixed manual GF frequency being used
if (cpi->auto_gold)
if (!cpi->auto_gold)
cpi->common.refresh_golden_frame = TRUE;
else
{
// For one pass throw a GF if recent frame intra useage is low or the GF useage is high
if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5))

File diff suppressed because it is too large Load Diff

View File

@@ -126,24 +126,6 @@ void vp8_sad16x16x3_c(
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp8_sad16x16x8_c(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned short *sad_array
)
{
sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
}
void vp8_sad16x8x3_c(
const unsigned char *src_ptr,
int src_stride,
@@ -157,24 +139,6 @@ void vp8_sad16x8x3_c(
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp8_sad16x8x8_c(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned short *sad_array
)
{
sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
}
void vp8_sad8x8x3_c(
const unsigned char *src_ptr,
int src_stride,
@@ -188,24 +152,6 @@ void vp8_sad8x8x3_c(
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp8_sad8x8x8_c(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned short *sad_array
)
{
sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
}
void vp8_sad8x16x3_c(
const unsigned char *src_ptr,
int src_stride,
@@ -219,24 +165,6 @@ void vp8_sad8x16x3_c(
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp8_sad8x16x8_c(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned short *sad_array
)
{
sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
}
void vp8_sad4x4x3_c(
const unsigned char *src_ptr,
int src_stride,
@@ -250,24 +178,6 @@ void vp8_sad4x4x3_c(
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp8_sad4x4x8_c(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride,
unsigned short *sad_array
)
{
sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
}
void vp8_sad16x16x4d_c(
const unsigned char *src_ptr,
int src_stride,

View File

@@ -36,37 +36,30 @@
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
#define USE_FILTER_LUT 0 // use lookup table to improve filter
#define USE_FILTER_LUT 1
#if VP8_TEMPORAL_ALT_REF
#if USE_FILTER_LUT
// for (strength = 0; strength <= 6; strength++) {
// for (delta = 0; delta <= 18; delta++) {
// float coeff = (3.0 * delta * delta) / pow(2, strength);
// printf("%3d", (int)roundf(coeff > 16 ? 0 : 16-coeff));
// }
// printf("\n");
// }
static int modifier_lut[7][19] =
{
// Strength=0
{16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// Strength=1
{16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// Strength=2
{16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// Strength=3
{16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// Strength=4
{16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// Strength=5
{16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
{16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
// Strength=6
{16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
{16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
};
#endif
static void vp8_temporal_filter_predictors_mb_c
static void build_predictors_mb
(
MACROBLOCKD *x,
unsigned char *y_mb_ptr,
@@ -118,7 +111,7 @@ static void vp8_temporal_filter_predictors_mb_c
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
}
}
void vp8_temporal_filter_apply_c
static void apply_temporal_filter
(
unsigned char *frame1,
unsigned int stride,
@@ -147,14 +140,16 @@ void vp8_temporal_filter_apply_c
int pixel_value = *frame2++;
#if USE_FILTER_LUT
// LUT implementation --
// improves precision of filter
modifier = abs(src_byte-pixel_value);
modifier = modifier>18 ? 0 : lut[modifier];
#else
modifier = src_byte - pixel_value;
modifier = src_byte;
modifier -= pixel_value;
modifier *= modifier;
modifier *= 3;
modifier += 1 << (strength - 1);
modifier >>= strength;
modifier *= 3;
if (modifier > 16)
modifier = 16;
@@ -176,7 +171,7 @@ void vp8_temporal_filter_apply_c
#if ALT_REF_MC_ENABLED
static int dummy_cost[2*mv_max+1];
static int vp8_temporal_filter_find_matching_mb_c
static int find_matching_mb
(
VP8_COMP *cpi,
YV12_BUFFER_CONFIG *arf_frame,
@@ -251,7 +246,7 @@ static int vp8_temporal_filter_find_matching_mb_c
step_param,
sadpb / 2/*x->errorperbit*/,
&num00, &cpi->fn_ptr[BLOCK_16X16],
mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9
mvsadcost, mvcost); //sadpb < 9
// Further step/diamond searches as necessary
n = 0;
@@ -273,7 +268,7 @@ static int vp8_temporal_filter_find_matching_mb_c
step_param + n,
sadpb / 4/*x->errorperbit*/,
&num00, &cpi->fn_ptr[BLOCK_16X16],
mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9
mvsadcost, mvcost); //sadpb = 9
if (thissme < bestsme)
{
@@ -297,7 +292,7 @@ static int vp8_temporal_filter_find_matching_mb_c
bestsme = cpi->find_fractional_mv_step(x, b, d,
&d->bmi.mv.as_mv, &best_ref_mv1,
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
mvcost);
cpi->mb.mvcost);
}
#endif
@@ -313,7 +308,7 @@ static int vp8_temporal_filter_find_matching_mb_c
}
#endif
static void vp8_temporal_filter_iterate_c
static void vp8cx_temp_blur1_c
(
VP8_COMP *cpi,
int frame_count,
@@ -417,12 +412,11 @@ static void vp8_temporal_filter_iterate_c
#define THRESH_HIGH 20000
// Correlation has been lost try MC
err = vp8_temporal_filter_find_matching_mb_c
(cpi,
cpi->frames[alt_ref_index],
cpi->frames[frame],
mb_y_offset,
THRESH_LOW);
err = find_matching_mb ( cpi,
cpi->frames[alt_ref_index],
cpi->frames[frame],
mb_y_offset,
THRESH_LOW );
if (filter_weight[frame] < 2)
{
@@ -435,46 +429,43 @@ static void vp8_temporal_filter_iterate_c
if (filter_weight[frame] != 0)
{
// Construct the predictors
vp8_temporal_filter_predictors_mb_c
(mbd,
cpi->frames[frame]->y_buffer + mb_y_offset,
cpi->frames[frame]->u_buffer + mb_uv_offset,
cpi->frames[frame]->v_buffer + mb_uv_offset,
cpi->frames[frame]->y_stride,
mbd->block[0].bmi.mv.as_mv.row,
mbd->block[0].bmi.mv.as_mv.col,
predictor);
build_predictors_mb (
mbd,
cpi->frames[frame]->y_buffer + mb_y_offset,
cpi->frames[frame]->u_buffer + mb_uv_offset,
cpi->frames[frame]->v_buffer + mb_uv_offset,
cpi->frames[frame]->y_stride,
mbd->block[0].bmi.mv.as_mv.row,
mbd->block[0].bmi.mv.as_mv.col,
predictor );
// Apply the filter (YUV)
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
(f->y_buffer + mb_y_offset,
f->y_stride,
predictor,
16,
strength,
filter_weight[frame],
accumulator,
count);
apply_temporal_filter ( f->y_buffer + mb_y_offset,
f->y_stride,
predictor,
16,
strength,
filter_weight[frame],
accumulator,
count );
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
(f->u_buffer + mb_uv_offset,
f->uv_stride,
predictor + 256,
8,
strength,
filter_weight[frame],
accumulator + 256,
count + 256);
apply_temporal_filter ( f->u_buffer + mb_uv_offset,
f->uv_stride,
predictor + 256,
8,
strength,
filter_weight[frame],
accumulator + 256,
count + 256 );
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
(f->v_buffer + mb_uv_offset,
f->uv_stride,
predictor + 320,
8,
strength,
filter_weight[frame],
accumulator + 320,
count + 320);
apply_temporal_filter ( f->v_buffer + mb_uv_offset,
f->uv_stride,
predictor + 320,
8,
strength,
filter_weight[frame],
accumulator + 320,
count + 320 );
}
}
@@ -543,7 +534,7 @@ static void vp8_temporal_filter_iterate_c
mbd->pre.v_buffer = v_buffer;
}
void vp8_temporal_filter_prepare_c
void vp8cx_temp_filter_c
(
VP8_COMP *cpi
)
@@ -651,7 +642,7 @@ void vp8_temporal_filter_prepare_c
= &cpi->src_buffer[which_buffer].source_buffer;
}
vp8_temporal_filter_iterate_c (
vp8cx_temp_blur1_c (
cpi,
frames_to_blur,
frames_to_blur_backward,

View File

@@ -12,33 +12,8 @@
#ifndef __INC_VP8_TEMPORAL_FILTER_H
#define __INC_VP8_TEMPORAL_FILTER_H
#define prototype_apply(sym)\
void (sym) \
( \
unsigned char *frame1, \
unsigned int stride, \
unsigned char *frame2, \
unsigned int block_size, \
int strength, \
int filter_weight, \
unsigned int *accumulator, \
unsigned int *count \
)
#include "onyx_int.h"
#ifndef vp8_temporal_filter_apply
#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
#endif
extern prototype_apply(vp8_temporal_filter_apply);
typedef struct
{
prototype_apply(*apply);
} vp8_temporal_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
#define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn
#else
#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_filter_##fn
#endif
void vp8cx_temp_filter_c(VP8_COMP *cpi);
#endif // __INC_VP8_TEMPORAL_FILTER_H

View File

@@ -132,6 +132,8 @@ static void tokenize2nd_order_b
t->Token = x;
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0);
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
++cpi->coef_counts [type] [band] [pt] [x];
@@ -183,6 +185,7 @@ static void tokenize1st_order_b
t->Token = x;
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0);
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
++cpi->coef_counts [type] [band] [pt] [x];
@@ -431,6 +434,7 @@ static __inline void stuff2nd_order_b
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
t->section = 11;
t->skip_eob_node = 0;
++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
++t;
@@ -461,6 +465,7 @@ static __inline void stuff1st_order_b
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt];
t->section = 8;
t->skip_eob_node = 0;
++cpi->coef_counts [0] [1] [pt] [DCT_EOB_TOKEN];
++t;
@@ -490,6 +495,7 @@ void stuff1st_order_buv
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
t->section = 13;
t->skip_eob_node = 0;
++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
++t;

View File

@@ -25,10 +25,11 @@ typedef struct
typedef struct
{
int Token;
int Extra;
const vp8_prob *context_tree;
short Extra;
unsigned char Token;
unsigned char skip_eob_node;
int skip_eob_node;
int section;
} TOKENEXTRA;
int rd_cost_mby(MACROBLOCKD *);

View File

@@ -32,16 +32,6 @@
unsigned int *sad_array\
)
#define prototype_sad_multi_same_address_1(sym)\
void (sym)\
(\
const unsigned char *src_ptr, \
int source_stride, \
const unsigned char *ref_ptr, \
int ref_stride, \
unsigned short *sad_array\
)
#define prototype_sad_multi_dif_address(sym)\
void (sym)\
(\
@@ -148,31 +138,6 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
#endif
extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
#ifndef vp8_variance_sad16x16x8
#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
#endif
extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8);
#ifndef vp8_variance_sad16x8x8
#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c
#endif
extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8);
#ifndef vp8_variance_sad8x8x8
#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c
#endif
extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8);
#ifndef vp8_variance_sad8x16x8
#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c
#endif
extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8);
#ifndef vp8_variance_sad4x4x8
#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c
#endif
extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
//-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#ifndef vp8_variance_sad16x16x4d
@@ -309,7 +274,6 @@ extern prototype_sad(vp8_variance_get4x4sse_cs);
typedef prototype_sad(*vp8_sad_fn_t);
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
typedef prototype_variance(*vp8_variance_fn_t);
typedef prototype_variance2(*vp8_variance2_fn_t);
@@ -353,12 +317,6 @@ typedef struct
vp8_sad_multi_fn_t sad8x8x3;
vp8_sad_multi_fn_t sad4x4x3;
vp8_sad_multi1_fn_t sad16x16x8;
vp8_sad_multi1_fn_t sad16x8x8;
vp8_sad_multi1_fn_t sad8x16x8;
vp8_sad_multi1_fn_t sad8x8x8;
vp8_sad_multi1_fn_t sad4x4x8;
vp8_sad_multi_d_fn_t sad16x16x4d;
vp8_sad_multi_d_fn_t sad16x8x4d;
vp8_sad_multi_d_fn_t sad8x16x4d;
@@ -376,7 +334,6 @@ typedef struct
vp8_variance_fn_t svf_halfpix_v;
vp8_variance_fn_t svf_halfpix_hv;
vp8_sad_multi_fn_t sdx3f;
vp8_sad_multi1_fn_t sdx8f;
vp8_sad_multi_d_fn_t sdx4df;
} vp8_variance_fn_ptr_t;

View File

@@ -11,231 +11,511 @@
%include "vpx_ports/x86_abi_support.asm"
section .text
global sym(vp8_short_fdct4x4_mmx)
global sym(vp8_short_fdct8x4_wmt)
%define DCTCONSTANTSBITS (16)
%define DCTROUNDINGVALUE (1<< (DCTCONSTANTSBITS-1))
%define x_c1 (60547) ; cos(pi /8) * (1<<15)
%define x_c2 (46341) ; cos(pi*2/8) * (1<<15)
%define x_c3 (25080) ; cos(pi*3/8) * (1<<15)
;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
global sym(vp8_short_fdct4x4_mmx)
sym(vp8_short_fdct4x4_mmx):
push rbp
mov rbp, rsp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;input
mov rdi, arg(1) ;output
mov rsi, arg(0) ; input
mov rdi, arg(1) ; output
lea rdx, [GLOBAL(dct_const_mmx)]
movsxd rax, dword ptr arg(2) ;pitch
movsxd rax, dword ptr arg(2) ;pitch
lea rcx, [rsi + rax*2]
lea rcx, [rsi + rax*2]
; read the input data
movq mm0, [rsi]
movq mm1, [rsi + rax]
movq mm0, [rsi]
movq mm1, [rsi + rax ]
movq mm2, [rcx]
movq mm4, [rcx + rax]
movq mm2, [rcx]
movq mm3, [rcx + rax]
; get the constants
;shift to left by 1 for prescision
psllw mm0, 3
psllw mm1, 3
; transpose for the first stage
movq mm3, mm0 ; 00 01 02 03
movq mm5, mm2 ; 20 21 22 23
psllw mm2, 3
psllw mm3, 3
punpcklwd mm0, mm1 ; 00 10 01 11
punpckhwd mm3, mm1 ; 02 12 03 13
; transpose for the second stage
movq mm4, mm0 ; 00 01 02 03
movq mm5, mm2 ; 10 11 12 03
punpcklwd mm2, mm4 ; 20 30 21 31
punpckhwd mm5, mm4 ; 22 32 23 33
punpcklwd mm0, mm1 ; 00 10 01 11
punpckhwd mm4, mm1 ; 02 12 03 13
movq mm1, mm0 ; 00 10 01 11
punpckldq mm0, mm2 ; 00 10 20 30
punpcklwd mm2, mm3 ; 20 30 21 31
punpckhwd mm5, mm3 ; 22 32 23 33
punpckhdq mm1, mm2 ; 01 11 21 31
movq mm2, mm3 ; 02 12 03 13
punpckldq mm2, mm5 ; 02 12 22 32
movq mm1, mm0 ; 00 10 01 11
punpckldq mm0, mm2 ; 00 10 20 30
punpckhdq mm3, mm5 ; 03 13 23 33
punpckhdq mm1, mm2 ; 01 11 21 31
movq mm2, mm4 ; 02 12 03 13
punpckldq mm2, mm5 ; 02 12 22 32
punpckhdq mm4, mm5 ; 03 13 23 33
movq mm3, mm4
; mm0 0
; mm1 1
; mm2 2
; mm3 3
; first stage
movq mm5, mm0
movq mm4, mm1
movq mm5, mm0
movq mm4, mm1
paddw mm0, mm3 ; a1 = 0 + 3
paddw mm1, mm2 ; b1 = 1 + 2
paddw mm0, mm3 ; a = 0 + 3
paddw mm1, mm2 ; b = 1 + 2
psubw mm4, mm2 ; c1 = 1 - 2
psubw mm5, mm3 ; d1 = 0 - 3
psubw mm4, mm2 ; c = 1 - 2
psubw mm5, mm3 ; d = 0 - 3
psllw mm5, 3
psllw mm4, 3
psllw mm0, 3
psllw mm1, 3
; output 0 and 2
movq mm2, mm0 ; a1
movq mm6, [rdx + 16] ; c2
movq mm2, mm0 ; a
paddw mm0, mm1 ; op[0] = a1 + b1
psubw mm2, mm1 ; op[2] = a1 - b1
paddw mm0, mm1 ; a + b
psubw mm2, mm1 ; a - b
movq mm1, mm0 ; a + b
pmulhw mm0, mm6 ; 00 01 02 03
paddw mm0, mm1 ; output 00 01 02 03
pmulhw mm6, mm2 ; 20 21 22 23
paddw mm2, mm6 ; output 20 21 22 23
; output 1 and 3
; interleave c1, d1
movq mm1, mm5 ; d1
punpcklwd mm1, mm4 ; c1 d1
punpckhwd mm5, mm4 ; c1 d1
movq mm6, [rdx + 8] ; c1
movq mm7, [rdx + 24] ; c3
movq mm3, mm1
movq mm4, mm5
movq mm1, mm4 ; c
movq mm3, mm5 ; d
pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmulhw mm1, mm7 ; c * c3
pmulhw mm3, mm6 ; d * c1
pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
paddw mm3, mm5 ; d * c1 rounded
paddw mm1, mm3 ; output 10 11 12 13
paddd mm1, MMWORD PTR[GLOBAL(_14500)]
paddd mm4, MMWORD PTR[GLOBAL(_14500)]
paddd mm3, MMWORD PTR[GLOBAL(_7500)]
paddd mm5, MMWORD PTR[GLOBAL(_7500)]
movq mm3, mm4 ; c
pmulhw mm5, mm7 ; d * c3
psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
pmulhw mm4, mm6 ; c * c1
paddw mm3, mm4 ; round c* c1
psubw mm5, mm3 ; output 30 31 32 33
movq mm3, mm5
packssdw mm1, mm4 ; op[1]
packssdw mm3, mm5 ; op[3]
; done with vertical
; transpose for the second stage
movq mm4, mm0 ; 00 10 20 30
movq mm5, mm2 ; 02 12 22 32
movq mm4, mm0 ; 00 01 02 03
movq mm5, mm2 ; 10 11 12 03
punpcklwd mm0, mm1 ; 00 01 10 11
punpckhwd mm4, mm1 ; 20 21 30 31
punpcklwd mm0, mm1 ; 00 10 01 11
punpckhwd mm4, mm1 ; 02 12 03 13
punpcklwd mm2, mm3 ; 02 03 12 13
punpckhwd mm5, mm3 ; 22 23 32 33
punpcklwd mm2, mm3 ; 20 30 21 31
punpckhwd mm5, mm3 ; 22 32 23 33
movq mm1, mm0 ; 00 01 10 11
punpckldq mm0, mm2 ; 00 01 02 03
punpckhdq mm1, mm2 ; 01 22 12 13
movq mm1, mm0 ; 00 10 01 11
punpckldq mm0, mm2 ; 00 10 20 30
movq mm2, mm4 ; 20 31 30 31
punpckldq mm2, mm5 ; 20 21 22 23
punpckhdq mm1, mm2 ; 01 11 21 31
punpckhdq mm4, mm5 ; 30 31 32 33
movq mm2, mm4 ; 02 12 03 13
punpckldq mm2, mm5 ; 02 12 22 32
; mm0 0
; mm1 1
; mm2 2
; mm3 4
punpckhdq mm4, mm5 ; 03 13 23 33
movq mm3, mm4
movq mm5, mm0
movq mm3, mm1
paddw mm0, mm4 ; a1 = 0 + 3
paddw mm1, mm2 ; b1 = 1 + 2
; first stage
movq mm5, mm0
movq mm4, mm1
psubw mm3, mm2 ; c1 = 1 - 2
psubw mm5, mm4 ; d1 = 0 - 3
paddw mm0, mm3 ; a = 0 + 3
paddw mm1, mm2 ; b = 1 + 2
pxor mm6, mm6 ; zero out for compare
psubw mm4, mm2 ; c = 1 - 2
psubw mm5, mm3 ; d = 0 - 3
pcmpeqw mm6, mm5 ; d1 != 0
pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper,
; and keep bit 0 of lower
; output 0 and 2
movq mm2, mm0 ; a1
movq mm6, [rdx + 16] ; c2
movq mm2, mm0 ; a
paddw mm0, mm1 ; a + b
paddw mm0, mm1 ; a1 + b1
psubw mm2, mm1 ; a1 - b1
psubw mm2, mm1 ; a - b
paddw mm0, MMWORD PTR[GLOBAL(_7w)]
paddw mm2, MMWORD PTR[GLOBAL(_7w)]
movq mm1, mm0 ; a + b
pmulhw mm0, mm6 ; 00 01 02 03
psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4
psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4
paddw mm0, mm1 ; output 00 01 02 03
pmulhw mm6, mm2 ; 20 21 22 23
paddw mm2, mm6 ; output 20 21 22 23
movq MMWORD PTR[rdi + 0 ], mm0
movq MMWORD PTR[rdi + 16], mm2
; output 1 and 3
; interleave c1, d1
movq mm1, mm5 ; d1
punpcklwd mm1, mm3 ; c1 d1
punpckhwd mm5, mm3 ; c1 d1
movq mm6, [rdx + 8] ; c1
movq mm7, [rdx + 24] ; c3
movq mm3, mm1
movq mm4, mm5
movq mm1, mm4 ; c
movq mm3, mm5 ; d
pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmulhw mm1, mm7 ; c * c3
pmulhw mm3, mm6 ; d * c1
pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
paddw mm3, mm5 ; d * c1 rounded
paddw mm1, mm3 ; output 10 11 12 13
paddd mm1, MMWORD PTR[GLOBAL(_12000)]
paddd mm4, MMWORD PTR[GLOBAL(_12000)]
paddd mm3, MMWORD PTR[GLOBAL(_51000)]
paddd mm5, MMWORD PTR[GLOBAL(_51000)]
movq mm3, mm4 ; c
pmulhw mm5, mm7 ; d * c3
psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
pmulhw mm4, mm6 ; c * c1
paddw mm3, mm4 ; round c* c1
packssdw mm1, mm4 ; op[4]
packssdw mm3, mm5 ; op[12]
psubw mm5, mm3 ; output 30 31 32 33
movq mm3, mm5
; done with vertical
paddw mm1, mm6 ; op[4] += (d1!=0)
pcmpeqw mm4, mm4
pcmpeqw mm5, mm5
psrlw mm4, 15
psrlw mm5, 15
movq MMWORD PTR[rdi + 8 ], mm1
movq MMWORD PTR[rdi + 24], mm3
psllw mm4, 2
psllw mm5, 2
; begin epilog
pop rdi
pop rsi
paddw mm0, mm4
paddw mm1, mm5
paddw mm2, mm4
paddw mm3, mm5
psraw mm0, 3
psraw mm1, 3
psraw mm2, 3
psraw mm3, 3
movq [rdi ], mm0
movq [rdi+ 8], mm1
movq [rdi+16], mm2
movq [rdi+24], mm3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
sym(vp8_short_fdct8x4_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;input
mov rdi, arg(1) ;output
lea rdx, [GLOBAL(dct_const_xmm)]
movsxd rax, dword ptr arg(2) ;pitch
lea rcx, [rsi + rax*2]
; read the input data
movdqa xmm0, [rsi]
movdqa xmm2, [rsi + rax]
movdqa xmm4, [rcx]
movdqa xmm3, [rcx + rax]
; get the constants
;shift to left by 1 for prescision
psllw xmm0, 3
psllw xmm2, 3
psllw xmm4, 3
psllw xmm3, 3
; transpose for the second stage
movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
; xmm0 0
; xmm1 1
; xmm2 2
; xmm3 3
; first stage
movdqa xmm5, xmm0
movdqa xmm4, xmm1
paddw xmm0, xmm3 ; a = 0 + 3
paddw xmm1, xmm2 ; b = 1 + 2
psubw xmm4, xmm2 ; c = 1 - 2
psubw xmm5, xmm3 ; d = 0 - 3
; output 0 and 2
movdqa xmm6, [rdx + 32] ; c2
movdqa xmm2, xmm0 ; a
paddw xmm0, xmm1 ; a + b
psubw xmm2, xmm1 ; a - b
movdqa xmm1, xmm0 ; a + b
pmulhw xmm0, xmm6 ; 00 01 02 03
paddw xmm0, xmm1 ; output 00 01 02 03
pmulhw xmm6, xmm2 ; 20 21 22 23
paddw xmm2, xmm6 ; output 20 21 22 23
; output 1 and 3
movdqa xmm6, [rdx + 16] ; c1
movdqa xmm7, [rdx + 48] ; c3
movdqa xmm1, xmm4 ; c
movdqa xmm3, xmm5 ; d
pmulhw xmm1, xmm7 ; c * c3
pmulhw xmm3, xmm6 ; d * c1
paddw xmm3, xmm5 ; d * c1 rounded
paddw xmm1, xmm3 ; output 10 11 12 13
movdqa xmm3, xmm4 ; c
pmulhw xmm5, xmm7 ; d * c3
pmulhw xmm4, xmm6 ; c * c1
paddw xmm3, xmm4 ; round c* c1
psubw xmm5, xmm3 ; output 30 31 32 33
movdqa xmm3, xmm5
; done with vertical
; transpose for the second stage
movdqa xmm4, xmm2 ; 02 12 22 32 06 16 26 36
movdqa xmm2, xmm1 ; 01 11 21 31 05 15 25 35
movdqa xmm1, xmm0 ; 00 10 20 30 04 14 24 34
movdqa xmm5, xmm4 ; 02 12 22 32 06 16 26 36
punpcklwd xmm0, xmm2 ; 00 01 10 11 20 21 30 31
punpckhwd xmm1, xmm2 ; 04 05 14 15 24 25 34 35
punpcklwd xmm4, xmm3 ; 02 03 12 13 22 23 32 33
punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
movdqa xmm2, xmm0 ; 00 01 10 11 20 21 30 31
punpckldq xmm0, xmm4 ; 00 01 02 03 10 11 12 13
punpckhdq xmm2, xmm4 ; 20 21 22 23 30 31 32 33
movdqa xmm4, xmm1 ; 04 05 14 15 24 25 34 35
punpckldq xmm4, xmm5 ; 04 05 06 07 14 15 16 17
punpckhdq xmm1, xmm5 ; 24 25 26 27 34 35 36 37
movdqa xmm3, xmm2 ; 20 21 22 23 30 31 32 33
punpckhqdq xmm3, xmm1 ; 30 31 32 33 34 35 36 37
punpcklqdq xmm2, xmm1 ; 20 21 22 23 24 25 26 27
movdqa xmm1, xmm0 ; 00 01 02 03 10 11 12 13
punpcklqdq xmm0, xmm4 ; 00 01 02 03 04 05 06 07
punpckhqdq xmm1, xmm4 ; 10 11 12 13 14 15 16 17
; first stage
movdqa xmm5, xmm0
movdqa xmm4, xmm1
paddw xmm0, xmm3 ; a = 0 + 3
paddw xmm1, xmm2 ; b = 1 + 2
psubw xmm4, xmm2 ; c = 1 - 2
psubw xmm5, xmm3 ; d = 0 - 3
; output 0 and 2
movdqa xmm6, [rdx + 32] ; c2
movdqa xmm2, xmm0 ; a
paddw xmm0, xmm1 ; a + b
psubw xmm2, xmm1 ; a - b
movdqa xmm1, xmm0 ; a + b
pmulhw xmm0, xmm6 ; 00 01 02 03
paddw xmm0, xmm1 ; output 00 01 02 03
pmulhw xmm6, xmm2 ; 20 21 22 23
paddw xmm2, xmm6 ; output 20 21 22 23
; output 1 and 3
movdqa xmm6, [rdx + 16] ; c1
movdqa xmm7, [rdx + 48] ; c3
movdqa xmm1, xmm4 ; c
movdqa xmm3, xmm5 ; d
pmulhw xmm1, xmm7 ; c * c3
pmulhw xmm3, xmm6 ; d * c1
paddw xmm3, xmm5 ; d * c1 rounded
paddw xmm1, xmm3 ; output 10 11 12 13
movdqa xmm3, xmm4 ; c
pmulhw xmm5, xmm7 ; d * c3
pmulhw xmm4, xmm6 ; c * c1
paddw xmm3, xmm4 ; round c* c1
psubw xmm5, xmm3 ; output 30 31 32 33
movdqa xmm3, xmm5
; done with vertical
pcmpeqw xmm4, xmm4
pcmpeqw xmm5, xmm5;
psrlw xmm4, 15
psrlw xmm5, 15
psllw xmm4, 2
psllw xmm5, 2
paddw xmm0, xmm4
paddw xmm1, xmm5
paddw xmm2, xmm4
paddw xmm3, xmm5
psraw xmm0, 3
psraw xmm1, 3
psraw xmm2, 3
psraw xmm3, 3
movq QWORD PTR[rdi ], xmm0
movq QWORD PTR[rdi+ 8], xmm1
movq QWORD PTR[rdi+16], xmm2
movq QWORD PTR[rdi+24], xmm3
psrldq xmm0, 8
psrldq xmm1, 8
psrldq xmm2, 8
psrldq xmm3, 8
movq QWORD PTR[rdi+32], xmm0
movq QWORD PTR[rdi+40], xmm1
movq QWORD PTR[rdi+48], xmm2
movq QWORD PTR[rdi+56], xmm3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 8
_5352_2217:
dw 5352
dw 2217
dw 5352
dw 2217
align 8
_2217_neg5352:
dw 2217
dw -5352
dw 2217
dw -5352
align 8
_cmp_mask:
times 4 dw 1
align 8
_7w:
times 4 dw 7
align 8
_14500:
times 2 dd 14500
align 8
_7500:
times 2 dd 7500
align 8
_12000:
times 2 dd 12000
align 8
_51000:
times 2 dd 51000
;static const unsigned int dct1st_stage_rounding_mmx[2] =
align 16
dct1st_stage_rounding_mmx:
times 2 dd 8192
;static const unsigned int dct2nd_stage_rounding_mmx[2] =
align 16
dct2nd_stage_rounding_mmx:
times 2 dd 32768
;static const short dct_matrix[4][4]=
align 16
dct_matrix:
times 4 dw 23170
dw 30274
dw 12540
dw -12540
dw -30274
dw 23170
times 2 dw -23170
dw 23170
dw 12540
dw -30274
dw 30274
dw -12540
;static const unsigned short dct_const_mmx[4 * 4]=
align 16
dct_const_mmx:
times 4 dw 0
times 4 dw 60547
times 4 dw 46341
times 4 dw 25080
;static const unsigned short dct_const_xmm[8 * 4]=
align 16
dct_const_xmm:
times 8 dw 0
times 8 dw 60547
times 8 dw 46341
times 8 dw 25080

View File

@@ -11,68 +11,32 @@
%include "vpx_ports/x86_abi_support.asm"
%macro STACK_FRAME_CREATE 0
%if ABI_IS_32BIT
%define input rsi
%define output rdi
%define pitch rax
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
global sym(vp8_short_fdct4x4_sse2)
sym(vp8_short_fdct4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
;; SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0)
mov rdi, arg(1)
movsxd rax, DWORD PTR arg(2)
lea rdi, [rsi + rax*2]
movsxd rax, dword ptr arg(2)
lea rcx, [rsi + rax*2]
%else
%ifidn __OUTPUT_FORMAT__,x64
%define input rcx
%define output rdx
%define pitch r8
%else
%define input rdi
%define output rsi
%define pitch rdx
%endif
%endif
%endmacro
%macro STACK_FRAME_DESTROY 0
%define input
%define output
%define pitch
%if ABI_IS_32BIT
pop rdi
pop rsi
RESTORE_GOT
pop rbp
%else
%ifidn __OUTPUT_FORMAT__,x64
%endif
%endif
ret
%endmacro
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
global sym(vp8_short_fdct4x4_sse2)
sym(vp8_short_fdct4x4_sse2):
STACK_FRAME_CREATE
movq xmm0, MMWORD PTR[input ] ;03 02 01 00
movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10
lea input, [input+2*pitch]
movq xmm1, MMWORD PTR[input ] ;23 22 21 20
movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30
movq xmm0, MMWORD PTR[rsi ] ;03 02 01 00
movq xmm2, MMWORD PTR[rsi + rax] ;13 12 11 10
movq xmm1, MMWORD PTR[rsi + rax*2] ;23 22 21 20
movq xmm3, MMWORD PTR[rdi + rax] ;33 32 31 30
punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00
punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20
mov rdi, arg(1)
movdqa xmm2, xmm0
punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00
punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10
@@ -87,7 +51,6 @@ sym(vp8_short_fdct4x4_sse2):
psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1
psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3
psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3
movdqa xmm1, xmm0
pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
@@ -158,216 +121,17 @@ sym(vp8_short_fdct4x4_sse2):
punpcklqdq xmm0, xmm3 ;op[4] op[0]
punpckhqdq xmm1, xmm3 ;op[12] op[8]
movdqa XMMWORD PTR[output + 0], xmm0
movdqa XMMWORD PTR[output + 16], xmm1
movdqa XMMWORD PTR[rdi + 0], xmm0
movdqa XMMWORD PTR[rdi + 16], xmm1
STACK_FRAME_DESTROY
;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
global sym(vp8_short_fdct8x4_sse2)
sym(vp8_short_fdct8x4_sse2):
STACK_FRAME_CREATE
; read the input data
movdqa xmm0, [input ]
movdqa xmm2, [input+ pitch]
lea input, [input+2*pitch]
movdqa xmm4, [input ]
movdqa xmm3, [input+ pitch]
; transpose for the first stage
movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
; xmm0 0
; xmm1 1
; xmm2 2
; xmm3 3
; first stage
movdqa xmm5, xmm0
movdqa xmm4, xmm1
paddw xmm0, xmm3 ; a1 = 0 + 3
paddw xmm1, xmm2 ; b1 = 1 + 2
psubw xmm4, xmm2 ; c1 = 1 - 2
psubw xmm5, xmm3 ; d1 = 0 - 3
psllw xmm5, 3
psllw xmm4, 3
psllw xmm0, 3
psllw xmm1, 3
; output 0 and 2
movdqa xmm2, xmm0 ; a1
paddw xmm0, xmm1 ; op[0] = a1 + b1
psubw xmm2, xmm1 ; op[2] = a1 - b1
; output 1 and 3
; interleave c1, d1
movdqa xmm1, xmm5 ; d1
punpcklwd xmm1, xmm4 ; c1 d1
punpckhwd xmm5, xmm4 ; c1 d1
movdqa xmm3, xmm1
movdqa xmm4, xmm5
pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
paddd xmm1, XMMWORD PTR[GLOBAL(_14500)]
paddd xmm4, XMMWORD PTR[GLOBAL(_14500)]
paddd xmm3, XMMWORD PTR[GLOBAL(_7500)]
paddd xmm5, XMMWORD PTR[GLOBAL(_7500)]
psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
packssdw xmm1, xmm4 ; op[1]
packssdw xmm3, xmm5 ; op[3]
; done with vertical
; transpose for the second stage
movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34
movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36
punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31
punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35
punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33
punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31
punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13
punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33
movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35
punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17
punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37
movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33
punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37
punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27
movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13
punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07
punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17
; xmm0 0
; xmm1 4
; xmm2 1
; xmm3 3
movdqa xmm5, xmm0
movdqa xmm2, xmm1
paddw xmm0, xmm3 ; a1 = 0 + 3
paddw xmm1, xmm4 ; b1 = 1 + 2
psubw xmm4, xmm2 ; c1 = 1 - 2
psubw xmm5, xmm3 ; d1 = 0 - 3
pxor xmm6, xmm6 ; zero out for compare
pcmpeqw xmm6, xmm5 ; d1 != 0
pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper,
; and keep bit 0 of lower
; output 0 and 2
movdqa xmm2, xmm0 ; a1
paddw xmm0, xmm1 ; a1 + b1
psubw xmm2, xmm1 ; a1 - b1
paddw xmm0, XMMWORD PTR[GLOBAL(_7w)]
paddw xmm2, XMMWORD PTR[GLOBAL(_7w)]
psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4
psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4
; output 1 and 3
; interleave c1, d1
movdqa xmm1, xmm5 ; d1
punpcklwd xmm1, xmm4 ; c1 d1
punpckhwd xmm5, xmm4 ; c1 d1
movdqa xmm3, xmm1
movdqa xmm4, xmm5
pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
paddd xmm1, XMMWORD PTR[GLOBAL(_12000)]
paddd xmm4, XMMWORD PTR[GLOBAL(_12000)]
paddd xmm3, XMMWORD PTR[GLOBAL(_51000)]
paddd xmm5, XMMWORD PTR[GLOBAL(_51000)]
psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
packssdw xmm1, xmm4 ; op[4]
packssdw xmm3, xmm5 ; op[12]
paddw xmm1, xmm6 ; op[4] += (d1!=0)
movdqa xmm4, xmm0
movdqa xmm5, xmm2
punpcklqdq xmm0, xmm1
punpckhqdq xmm4, xmm1
punpcklqdq xmm2, xmm3
punpckhqdq xmm5, xmm3
movdqa XMMWORD PTR[output + 0 ], xmm0
movdqa XMMWORD PTR[output + 16], xmm2
movdqa XMMWORD PTR[output + 32], xmm4
movdqa XMMWORD PTR[output + 48], xmm5
STACK_FRAME_DESTROY
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
;; RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
@@ -397,9 +161,7 @@ align 16
_cmp_mask:
times 4 dw 1
times 4 dw 0
align 16
_cmp_mask8x4:
times 8 dw 1
align 16
_mult_sub:
dw 1
@@ -414,9 +176,6 @@ align 16
_7:
times 4 dd 7
align 16
_7w:
times 8 dw 7
align 16
_14500:
times 4 dd 14500
align 16

View File

@@ -24,31 +24,33 @@ extern prototype_fdct(vp8_short_fdct4x4_mmx);
extern prototype_fdct(vp8_short_fdct8x4_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
#if 0
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
#endif
#endif
#endif
#if HAVE_SSE2
extern prototype_fdct(vp8_short_fdct8x4_sse2);
extern prototype_fdct(vp8_short_fdct8x4_wmt);
extern prototype_fdct(vp8_short_walsh4x4_sse2);
extern prototype_fdct(vp8_short_fdct4x4_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#if 1
/* short SSE2 DCT currently disabled, does not match the MMX version */
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
#endif
#undef vp8_fdct_fast4x4
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
@@ -56,7 +58,7 @@ extern prototype_fdct(vp8_short_fdct4x4_sse2);
#undef vp8_fdct_fast8x4
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
#undef vp8_fdct_walsh_short4x4
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_sse2
#endif

View File

@@ -24,14 +24,5 @@
#endif
#endif
#if HAVE_SSE4_1
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_search_full_search
#define vp8_search_full_search vp8_full_search_sadx8
#endif
#endif
#endif

View File

@@ -0,0 +1,298 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "memory.h"
#include "preproc.h"
#include "pragmas.h"
/****************************************************************************
* Macros
****************************************************************************/
#define FRAMECOUNT 7
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
/****************************************************************************
* Imports
****************************************************************************/
extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
/****************************************************************************
* Exported Global Variables
****************************************************************************/
void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
/****************************************************************************
*
* ROUTINE : temp_filter_wmt
*
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
* unsigned char *s : Pointer to source frame.
* unsigned char *d : Pointer to destination frame.
* int bytes : Number of bytes to filter.
* int strength : Strength of filter to apply.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Performs a closesness adjusted temporarl blur
*
* SPECIAL NOTES : Destination frame can be same as source frame.
*
****************************************************************************/
void temp_filter_wmt
(
pre_proc_instance *ppi,
unsigned char *s,
unsigned char *d,
int bytes,
int strength
)
{
int byte = 0;
unsigned char *frameptr = ppi->frame_buffer;
__declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3};
__declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16, 16, 16, 16, 16};
if (ppi->frame == 0)
{
do
{
int i;
int frame = 0;
do
{
for (i = 0; i < 8; i++)
{
*frameptr = s[byte+i];
++frameptr;
}
++frame;
}
while (frame < FRAMECOUNT);
for (i = 0; i < 8; i++)
d[byte+i] = s[byte+i];
byte += 8;
}
while (byte < bytes);
}
else
{
int i;
int offset2 = (ppi->frame % FRAMECOUNT);
do
{
__declspec(align(16)) unsigned short counts[8];
__declspec(align(16)) unsigned short sums[8];
__asm
{
mov eax, offset2
mov edi, s // source pixels
pxor xmm1, xmm1 // accumulator
pxor xmm7, xmm7
mov esi, frameptr // accumulator
pxor xmm2, xmm2 // count
movq xmm3, QWORD PTR [edi]
movq QWORD PTR [esi+8*eax], xmm3
punpcklbw xmm3, xmm2 // xmm3 source pixels
mov ecx, FRAMECOUNT
next_frame:
movq xmm4, QWORD PTR [esi] // get frame buffer values
punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels
movdqa xmm6, xmm4 // save the pixel values
psubsw xmm4, xmm3 // subtracted pixel values
pmullw xmm4, xmm4 // square xmm4
movd xmm5, strength
psrlw xmm4, xmm5 // should be strength
pmullw xmm4, threes // 3 * modifier
movdqa xmm5, sixteens // 16s
psubusw xmm5, xmm4 // 16 - modifiers
movdqa xmm4, xmm5 // save the modifiers
pmullw xmm4, xmm6 // multiplier values
paddusw xmm1, xmm4 // accumulator
paddusw xmm2, xmm5 // count
add esi, 8 // next frame
dec ecx // next set of eight pixels
jnz next_frame
movdqa counts, xmm2
psrlw xmm2, 1 // divide count by 2 for rounding
paddusw xmm1, xmm2 // rounding added in
mov frameptr, esi
movdqa sums, xmm1
}
for (i = 0; i < 8; i++)
{
int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
blurvalue >>= 16;
d[i] = blurvalue;
}
s += 8;
d += 8;
byte += 8;
}
while (byte < bytes);
}
++ppi->frame;
__asm emms
}
/****************************************************************************
*
* ROUTINE : temp_filter_mmx
*
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
* unsigned char *s : Pointer to source frame.
* unsigned char *d : Pointer to destination frame.
* int bytes : Number of bytes to filter.
* int strength : Strength of filter to apply.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Performs a closesness adjusted temporarl blur
*
* SPECIAL NOTES : Destination frame can be same as source frame.
*
****************************************************************************/
void temp_filter_mmx
(
pre_proc_instance *ppi,
unsigned char *s,
unsigned char *d,
int bytes,
int strength
)
{
int byte = 0;
unsigned char *frameptr = ppi->frame_buffer;
__declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3};
__declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16};
if (ppi->frame == 0)
{
do
{
int i;
int frame = 0;
do
{
for (i = 0; i < 4; i++)
{
*frameptr = s[byte+i];
++frameptr;
}
++frame;
}
while (frame < FRAMECOUNT);
for (i = 0; i < 4; i++)
d[byte+i] = s[byte+i];
byte += 4;
}
while (byte < bytes);
}
else
{
int i;
int offset2 = (ppi->frame % FRAMECOUNT);
do
{
__declspec(align(16)) unsigned short counts[8];
__declspec(align(16)) unsigned short sums[8];
__asm
{
mov eax, offset2
mov edi, s // source pixels
pxor mm1, mm1 // accumulator
pxor mm7, mm7
mov esi, frameptr // accumulator
pxor mm2, mm2 // count
movd mm3, DWORD PTR [edi]
movd DWORD PTR [esi+4*eax], mm3
punpcklbw mm3, mm2 // mm3 source pixels
mov ecx, FRAMECOUNT
next_frame:
movd mm4, DWORD PTR [esi] // get frame buffer values
punpcklbw mm4, mm7 // mm4 frame buffer pixels
movq mm6, mm4 // save the pixel values
psubsw mm4, mm3 // subtracted pixel values
pmullw mm4, mm4 // square mm4
movd mm5, strength
psrlw mm4, mm5 // should be strength
pmullw mm4, threes // 3 * modifier
movq mm5, sixteens // 16s
psubusw mm5, mm4 // 16 - modifiers
movq mm4, mm5 // save the modifiers
pmullw mm4, mm6 // multiplier values
paddusw mm1, mm4 // accumulator
paddusw mm2, mm5 // count
add esi, 4 // next frame
dec ecx // next set of eight pixels
jnz next_frame
movq counts, mm2
psrlw mm2, 1 // divide count by 2 for rounding
paddusw mm1, mm2 // rounding added in
mov frameptr, esi
movq sums, mm1
}
for (i = 0; i < 4; i++)
{
int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
blurvalue >>= 16;
d[i] = blurvalue;
}
s += 4;
d += 4;
byte += 4;
}
while (byte < bytes);
}
++ppi->frame;
__asm emms
}

View File

@@ -253,9 +253,10 @@ rq_zigzag_1c:
pop rbp
ret
;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
; short *qcoeff_ptr,short *dequant_ptr,
; short *inv_scan_order, short *round_ptr,
; short *scan_mask, short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
global sym(vp8_fast_quantize_b_impl_sse2)
sym(vp8_fast_quantize_b_impl_sse2):
@@ -264,18 +265,32 @@ sym(vp8_fast_quantize_b_impl_sse2):
SHADOW_ARGS_TO_STACK 7
push rsi
push rdi
push rbx
; end prolog
ALIGN_STACK 16, rax
%define save_xmm6 0
%define save_xmm7 16
%define vp8_fastquantizeb_stack_size save_xmm7 + 16
sub rsp, vp8_fastquantizeb_stack_size
movdqa XMMWORD PTR[rsp + save_xmm6], xmm6
movdqa XMMWORD PTR[rsp + save_xmm7], xmm7
mov rdx, arg(0) ;coeff_ptr
mov rcx, arg(2) ;dequant_ptr
mov rax, arg(3) ;scan_mask
mov rdi, arg(4) ;round_ptr
mov rsi, arg(5) ;quant_ptr
movdqa xmm0, XMMWORD PTR[rdx]
movdqa xmm4, XMMWORD PTR[rdx + 16]
movdqa xmm2, XMMWORD PTR[rdi] ;round lo
movdqa xmm3, XMMWORD PTR[rdi + 16] ;round hi
movdqa xmm6, XMMWORD PTR[rdi] ;round lo
movdqa xmm7, XMMWORD PTR[rdi + 16] ;round hi
movdqa xmm1, xmm0
movdqa xmm5, xmm4
@@ -288,8 +303,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
psubw xmm1, xmm0 ;x = abs(z)
psubw xmm5, xmm4 ;x = abs(z)
paddw xmm1, xmm2
paddw xmm5, xmm3
paddw xmm1, xmm6
paddw xmm5, xmm7
pmulhw xmm1, XMMWORD PTR[rsi]
pmulhw xmm5, XMMWORD PTR[rsi + 16]
@@ -297,8 +312,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
mov rdi, arg(1) ;qcoeff_ptr
mov rsi, arg(6) ;dqcoeff_ptr
movdqa xmm2, XMMWORD PTR[rcx]
movdqa xmm3, XMMWORD PTR[rcx + 16]
movdqa xmm6, XMMWORD PTR[rcx]
movdqa xmm7, XMMWORD PTR[rcx + 16]
pxor xmm1, xmm0
pxor xmm5, xmm4
@@ -308,47 +323,64 @@ sym(vp8_fast_quantize_b_impl_sse2):
movdqa XMMWORD PTR[rdi], xmm1
movdqa XMMWORD PTR[rdi + 16], xmm5
pmullw xmm2, xmm1
pmullw xmm3, xmm5
pmullw xmm6, xmm1
pmullw xmm7, xmm5
mov rdi, arg(3) ;inv_scan_order
movdqa xmm2, XMMWORD PTR[rax]
movdqa xmm3, XMMWORD PTR[rax+16];
; Start with 16
pxor xmm4, xmm4 ;clear all bits
pxor xmm4, xmm4 ;clear all bits
pcmpeqw xmm1, xmm4
pcmpeqw xmm5, xmm4
pcmpeqw xmm4, xmm4 ;set all bits
pcmpeqw xmm4, xmm4 ;set all bits
pxor xmm1, xmm4
pxor xmm5, xmm4
pand xmm1, XMMWORD PTR[rdi]
pand xmm5, XMMWORD PTR[rdi+16]
psrlw xmm1, 15
psrlw xmm5, 15
pmaxsw xmm1, xmm5
pmaddwd xmm1, xmm2
pmaddwd xmm5, xmm3
; now down to 8
pshufd xmm5, xmm1, 00001110b
movq xmm2, xmm1
movq xmm3, xmm5
pmaxsw xmm1, xmm5
psrldq xmm1, 8
psrldq xmm5, 8
; only 4 left
pshuflw xmm5, xmm1, 00001110b
paddd xmm1, xmm5
paddd xmm2, xmm3
pmaxsw xmm1, xmm5
paddd xmm1, xmm2
movq xmm5, xmm1
; okay, just 2!
pshuflw xmm5, xmm1, 00000001b
psrldq xmm1, 4
paddd xmm5, xmm1
pmaxsw xmm1, xmm5
movq rcx, xmm5
and rcx, 0xffff
movd rax, xmm1
and rax, 0xff
xor rdx, rdx
sub rdx, rcx
movdqa XMMWORD PTR[rsi], xmm2 ;store dqcoeff
movdqa XMMWORD PTR[rsi + 16], xmm3 ;store dqcoeff
bsr rax, rcx
inc rax
sar rdx, 31
and rax, rdx
movdqa XMMWORD PTR[rsi], xmm6 ;store dqcoeff
movdqa XMMWORD PTR[rsi + 16], xmm7 ;store dqcoeff
movdqa xmm6, XMMWORD PTR[rsp + save_xmm6]
movdqa xmm7, XMMWORD PTR[rsp + save_xmm7]
add rsp, vp8_fastquantizeb_stack_size
pop rsp
; begin epilog
pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS

View File

@@ -1,114 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
; short *qcoeff_ptr,short *dequant_ptr,
; short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
;
global sym(vp8_fast_quantize_b_impl_ssse3)
sym(vp8_fast_quantize_b_impl_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rdx, arg(0) ;coeff_ptr
mov rdi, arg(3) ;round_ptr
mov rsi, arg(4) ;quant_ptr
movdqa xmm0, [rdx]
movdqa xmm4, [rdx + 16]
movdqa xmm2, [rdi] ;round lo
movdqa xmm3, [rdi + 16] ;round hi
movdqa xmm1, xmm0
movdqa xmm5, xmm4
psraw xmm0, 15 ;sign of z (aka sz)
psraw xmm4, 15 ;sign of z (aka sz)
pabsw xmm1, xmm1
pabsw xmm5, xmm5
paddw xmm1, xmm2
paddw xmm5, xmm3
pmulhw xmm1, [rsi]
pmulhw xmm5, [rsi + 16]
mov rdi, arg(1) ;qcoeff_ptr
mov rcx, arg(2) ;dequant_ptr
mov rsi, arg(5) ;dqcoeff_ptr
pxor xmm1, xmm0
pxor xmm5, xmm4
psubw xmm1, xmm0
psubw xmm5, xmm4
movdqa [rdi], xmm1
movdqa [rdi + 16], xmm5
movdqa xmm2, [rcx]
movdqa xmm3, [rcx + 16]
pxor xmm4, xmm4
pmullw xmm2, xmm1
pmullw xmm3, xmm5
pcmpeqw xmm1, xmm4 ;non zero mask
pcmpeqw xmm5, xmm4 ;non zero mask
packsswb xmm1, xmm5
pshufb xmm1, [ GLOBAL(zz_shuf)]
pmovmskb edx, xmm1
; xor ecx, ecx
; mov eax, -1
;find_eob_loop:
; shr edx, 1
; jc fq_skip
; mov eax, ecx
;fq_skip:
; inc ecx
; cmp ecx, 16
; jne find_eob_loop
xor rdi, rdi
mov eax, -1
xor dx, ax ;flip the bits for bsr
bsr eax, edx
movdqa [rsi], xmm2 ;store dqcoeff
movdqa [rsi + 16], xmm3 ;store dqcoeff
sub edi, edx ;check for all zeros in bit mask
sar edi, 31 ;0 or -1
add eax, 1
and eax, edi ;if the bit mask was all zero,
;then eob = 0
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
zz_shuf:
db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15

File diff suppressed because it is too large Load Diff

View File

@@ -1,353 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%macro PROCESS_16X2X8 1
%if %1
movdqa xmm0, XMMWORD PTR [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
movq xmm2, MMWORD PTR [rdi+16]
punpcklqdq xmm1, xmm3
punpcklqdq xmm3, xmm2
movdqa xmm2, xmm1
mpsadbw xmm1, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm1, xmm2
paddw xmm1, xmm3
paddw xmm1, xmm4
%else
movdqa xmm0, XMMWORD PTR [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
movq xmm2, MMWORD PTR [rdi+16]
punpcklqdq xmm5, xmm3
punpcklqdq xmm3, xmm2
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm5, xmm3
paddw xmm5, xmm4
paddw xmm1, xmm5
%endif
movdqa xmm0, XMMWORD PTR [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
movq xmm2, MMWORD PTR [rdi+ rdx+16]
punpcklqdq xmm5, xmm3
punpcklqdq xmm3, xmm2
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
psrldq xmm0, 8
movdqa xmm4, xmm3
mpsadbw xmm3, xmm0, 0x0
mpsadbw xmm4, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm5, xmm3
paddw xmm5, xmm4
paddw xmm1, xmm5
%endmacro
%macro PROCESS_8X2X8 1
%if %1
movq xmm0, MMWORD PTR [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm1, xmm3
movdqa xmm2, xmm1
mpsadbw xmm1, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm1, xmm2
%else
movq xmm0, MMWORD PTR [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm5, xmm3
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm1, xmm5
%endif
movq xmm0, MMWORD PTR [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
punpcklqdq xmm5, xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
movdqa xmm2, xmm5
mpsadbw xmm5, xmm0, 0x0
mpsadbw xmm2, xmm0, 0x5
paddw xmm5, xmm2
paddw xmm1, xmm5
%endmacro
%macro PROCESS_4X2X8 1
%if %1
movd xmm0, [rsi]
movq xmm1, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm1, xmm3
mpsadbw xmm1, xmm0, 0x0
%else
movd xmm0, [rsi]
movq xmm5, MMWORD PTR [rdi]
movq xmm3, MMWORD PTR [rdi+8]
punpcklqdq xmm5, xmm3
mpsadbw xmm5, xmm0, 0x0
paddw xmm1, xmm5
%endif
movd xmm0, [rsi + rax]
movq xmm5, MMWORD PTR [rdi+ rdx]
movq xmm3, MMWORD PTR [rdi+ rdx+8]
punpcklqdq xmm5, xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
mpsadbw xmm5, xmm0, 0x0
paddw xmm1, xmm5
%endmacro
;void vp8_sad16x16x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array);
global sym(vp8_sad16x16x8_sse4)
sym(vp8_sad16x16x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
mov rdi, arg(4) ;Results
movdqu XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad16x8x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad16x8x8_sse4)
sym(vp8_sad16x8x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_16X2X8 1
PROCESS_16X2X8 0
PROCESS_16X2X8 0
PROCESS_16X2X8 0
mov rdi, arg(4) ;Results
movdqu XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad8x8x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad8x8x8_sse4)
sym(vp8_sad8x8x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_8X2X8 1
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
mov rdi, arg(4) ;Results
movdqu XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad8x16x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad8x16x8_sse4)
sym(vp8_sad8x16x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_8X2X8 1
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
PROCESS_8X2X8 0
mov rdi, arg(4) ;Results
movdqu XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_sad4x4x8_c(
; const unsigned char *src_ptr,
; int src_stride,
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array
;);
global sym(vp8_sad4x4x8_sse4)
sym(vp8_sad4x4x8_sse4):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;ref_ptr
movsxd rax, dword ptr arg(1) ;src_stride
movsxd rdx, dword ptr arg(3) ;ref_stride
PROCESS_4X2X8 1
PROCESS_4X2X8 0
mov rdi, arg(4) ;Results
movdqu XMMWORD PTR [rdi], xmm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret

View File

@@ -297,31 +297,4 @@ extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
#endif
#endif
#if HAVE_SSE4_1
extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4);
extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4);
extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4);
extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4);
extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_variance_sad16x16x8
#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4
#undef vp8_variance_sad16x8x8
#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4
#undef vp8_variance_sad8x16x8
#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4
#undef vp8_variance_sad8x8x8
#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4
#undef vp8_variance_sad4x4x8
#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4
#endif
#endif
#endif

View File

@@ -18,10 +18,11 @@
#if HAVE_MMX
void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
{
vp8_short_fdct4x4_mmx(input, output, pitch);
vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
vp8_short_fdct4x4_c(input, output, pitch);
vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
}
int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
short *qcoeff_ptr, short *dequant_ptr,
short *scan_mask, short *round_ptr,
@@ -32,7 +33,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
short *coeff_ptr = b->coeff;
short *zbin_ptr = b->zbin;
short *round_ptr = b->round;
short *quant_ptr = b->quant_fast;
short *quant_ptr = b->quant;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
@@ -81,16 +82,22 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSE2
void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
{
vp8_short_fdct4x4_sse2(input, output, pitch);
vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
}
int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
short *qcoeff_ptr, short *dequant_ptr,
const short *inv_scan_order, short *round_ptr,
short *scan_mask, short *round_ptr,
short *quant_ptr, short *dqcoeff_ptr);
void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
{
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
short *coeff_ptr = b->coeff;
short *round_ptr = b->round;
short *quant_ptr = b->quant_fast;
short *quant_ptr = b->quant;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
@@ -99,7 +106,8 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
coeff_ptr,
qcoeff_ptr,
dequant_ptr,
vp8_default_inv_zig_zag,
scan_mask,
round_ptr,
quant_ptr,
dqcoeff_ptr
@@ -171,25 +179,6 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSSE3
int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
short *qcoeff_ptr, short *dequant_ptr,
short *round_ptr,
short *quant_ptr, short *dqcoeff_ptr);
void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
{
d->eob = vp8_fast_quantize_b_impl_ssse3(
b->coeff,
d->qcoeff,
d->dequant,
b->round,
b->quant_fast,
d->dqcoeff
);
}
#endif
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
#if CONFIG_RUNTIME_CPU_DETECT
@@ -199,7 +188,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
int wmt_enabled = flags & HAS_SSE2;
int SSE3Enabled = flags & HAS_SSE3;
int SSSE3Enabled = flags & HAS_SSSE3;
int SSE4_1Enabled = flags & HAS_SSE4_1;
/* Note:
*
@@ -210,6 +198,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
if (mmx_enabled)
{
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx;
@@ -241,11 +230,18 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx;
cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx;
#if 0 // new fdct
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
#else
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c;
#endif
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
@@ -258,9 +254,10 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
}
#endif
#endif
#if HAVE_SSE2
if (wmt_enabled)
{
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt;
@@ -310,9 +307,10 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
}
#endif
#endif
#if HAVE_SSE3
if (SSE3Enabled)
{
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3;
@@ -330,30 +328,16 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3;
cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4;
}
#endif
#endif
#if HAVE_SSSE3
if (SSSE3Enabled)
{
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
}
#endif
#if HAVE_SSE4_1
if (SSE4_1Enabled)
{
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4;
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4;
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
cpi->rtcd.search.full_search = vp8_full_search_sadx8;
}
#endif
#endif
}

View File

@@ -17,6 +17,7 @@ VP8_COMMON_SRCS-yes += common/type_aliases.h
VP8_COMMON_SRCS-yes += common/pragmas.h
CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
VP8_COMMON_SRCS-yes += common/preproc.h
VP8_COMMON_SRCS-yes += common/vpxerrors.h
CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common

View File

@@ -37,7 +37,6 @@ struct vp8_extracfg
unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */
unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */
unsigned int arnr_type; /* alt_ref filter type */
vp8e_tuning tuning;
};
@@ -68,7 +67,6 @@ static const struct extraconfig_map extracfg_map[] =
0, /* arnr_max_frames */
3, /* arnr_strength */
3, /* arnr_type*/
0, /* tuning*/
}
}
};
@@ -106,7 +104,6 @@ update_error_state(vpx_codec_alg_priv_t *ctx,
}
#undef ERROR
#define ERROR(str) do {\
ctx->base.err_detail = str;\
return VPX_CODEC_INVALID_PARAM;\
@@ -135,8 +132,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
const vpx_codec_enc_cfg_t *cfg,
const struct vp8_extracfg *vp8_cfg)
{
RANGE_CHECK(cfg, g_w, 1, 16384);
RANGE_CHECK(cfg, g_h, 1, 16384);
RANGE_CHECK(cfg, g_w, 2, 16384);
RANGE_CHECK(cfg, g_h, 2, 16384);
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
RANGE_CHECK_HI(cfg, g_profile, 3);
@@ -338,7 +335,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
oxcf->arnr_strength = vp8_cfg.arnr_strength;
oxcf->arnr_type = vp8_cfg.arnr_type;
oxcf->tuning = vp8_cfg.tuning;
/*
printf("Current VP8 Settings: \n");
@@ -452,7 +448,6 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames);
MAP(VP8E_SET_ARNR_STRENGTH , xcfg.arnr_strength);
MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type);
MAP(VP8E_SET_TUNING, xcfg.tuning);
}
@@ -865,16 +860,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
{
YV12_BUFFER_CONFIG sd;
vp8_ppflags_t flags = {0};
if (ctx->preview_ppcfg.post_proc_flag)
{
flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag;
flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
flags.noise_level = ctx->preview_ppcfg.noise_level;
}
if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, &flags))
if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, ctx->preview_ppcfg.deblocking_level, ctx->preview_ppcfg.noise_level, ctx->preview_ppcfg.post_proc_flag))
{
/*
@@ -1033,7 +1020,6 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] =
{VP8E_SET_ARNR_MAXFRAMES, set_param},
{VP8E_SET_ARNR_STRENGTH , set_param},
{VP8E_SET_ARNR_TYPE , set_param},
{VP8E_SET_TUNING, set_param},
{ -1, NULL},
};

View File

@@ -65,19 +65,12 @@ struct vpx_codec_alg_priv
vpx_codec_priv_t base;
vpx_codec_mmap_t mmaps[NELEMENTS(vp8_mem_req_segs)-1];
vpx_codec_dec_cfg_t cfg;
vp8_stream_info_t si;
vp8_stream_info_t si;
int defer_alloc;
int decoder_init;
VP8D_PTR pbi;
int postproc_cfg_set;
vp8_postproc_cfg_t postproc_cfg;
#if CONFIG_POSTPROC_VISUALIZER
unsigned int dbg_postproc_flag;
int dbg_color_ref_frame_flag;
int dbg_color_mb_modes_flag;
int dbg_color_b_modes_flag;
int dbg_display_mv_flag;
#endif
vpx_image_t img;
int img_setup;
int img_avail;
@@ -423,27 +416,15 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
{
YV12_BUFFER_CONFIG sd;
INT64 time_stamp = 0, time_end_stamp = 0;
vp8_ppflags_t flags = {0};
int ppflag = 0;
int ppdeblocking = 0;
int ppnoise = 0;
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
{
flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag
#if CONFIG_POSTPROC_VISUALIZER
| ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0)
| ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
| ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
| ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0)
#endif
;
flags.deblocking_level = ctx->postproc_cfg.deblocking_level;
flags.noise_level = ctx->postproc_cfg.noise_level;
#if CONFIG_POSTPROC_VISUALIZER
flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag;
flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag;
flags.display_mv_flag = ctx->dbg_display_mv_flag;
#endif
ppflag = ctx->postproc_cfg.post_proc_flag;
ppdeblocking = ctx->postproc_cfg.deblocking_level;
ppnoise = ctx->postproc_cfg.noise_level;
}
if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
@@ -452,7 +433,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
res = update_error_state(ctx, &pbi->common.error);
}
if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, ppdeblocking, ppnoise, ppflag))
{
/* Align width/height */
unsigned int a_w = (sd.y_width + 15) & ~15;
@@ -466,7 +447,6 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
vpx_img_set_rect(&ctx->img,
VP8BORDERINPIXELS, VP8BORDERINPIXELS,
sd.y_width, sd.y_height);
ctx->img.user_priv = user_priv;
ctx->img_avail = 1;
}
@@ -666,59 +646,12 @@ static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx,
#endif
}
static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx,
int ctrl_id,
va_list args)
{
#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
int data = va_arg(args, int);
#define MAP(id, var) case id: var = data; break;
switch (ctrl_id)
{
MAP (VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag);
MAP (VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag);
MAP (VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag);
MAP (VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag);
}
return VPX_CODEC_OK;
#else
return VPX_CODEC_INCAPABLE;
#endif
}
static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
int ctrl_id,
va_list args)
{
int *update_info = va_arg(args, int *);
VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
if (update_info)
{
*update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME
+ pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME
+ pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME;
return VPX_CODEC_OK;
}
else
return VPX_CODEC_INVALID_PARAM;
}
vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
{
{VP8_SET_REFERENCE, vp8_set_reference},
{VP8_COPY_REFERENCE, vp8_get_reference},
{VP8_SET_POSTPROC, vp8_set_postproc},
{VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options},
{VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options},
{VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_options},
{VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options},
{VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates},
{VP8_SET_REFERENCE, vp8_set_reference},
{VP8_COPY_REFERENCE, vp8_get_reference},
{VP8_SET_POSTPROC, vp8_set_postproc},
{ -1, NULL},
};

View File

@@ -109,8 +109,6 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm

View File

@@ -38,13 +38,9 @@
*/
enum vp8_dec_control_id
{
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */
VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
VP8_SET_POSTPROC = 3, /**< set decoder's the post processing settings */
VP8_COMMON_CTRL_ID_MAX
};
@@ -54,14 +50,10 @@ enum vp8_dec_control_id
*/
enum vp8_postproc_level
{
VP8_NOFILTERING = 0,
VP8_DEBLOCK = 1<<0,
VP8_DEMACROBLOCK = 1<<1,
VP8_ADDNOISE = 1<<2,
VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */
VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */
VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */
VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */
VP8_NOFILTERING = 0,
VP8_DEBLOCK = 1,
VP8_DEMACROBLOCK = 2,
VP8_ADDNOISE = 4
};
/*!\brief post process flags
@@ -73,9 +65,9 @@ enum vp8_postproc_level
typedef struct vp8_postproc_cfg
{
int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
int noise_level; /**< the strength of additive noise, valid range [0, 16] */
int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
int noise_level; /**< the strength of additive noise, valid range [0, 16] */
} vp8_postproc_cfg_t;
/*!\brief reference frame type
@@ -103,16 +95,12 @@ typedef struct vpx_ref_frame
/*!\brief vp8 decoder control funciton parameter type
*
* defines the data type for each of VP8 decoder control function requires
* defines the data type for each of VP8 decoder control funciton requires
*/
VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *)
VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *)
VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *)
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int)
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int)
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int)
VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int)
/*! @} - end defgroup vp8 */

View File

@@ -140,8 +140,7 @@ enum vp8e_enc_control_id
VP8E_SET_ARNR_MAXFRAMES, /**< control function to set the max number of frames blurred creating arf*/
VP8E_SET_ARNR_STRENGTH , /**< control function to set the filter strength for the arf */
VP8E_SET_ARNR_TYPE , /**< control function to set the type of filter to use for the arf*/
VP8E_SET_TUNING, /**< control function to set visual tuning */
};
} ;
/*!\brief vpx 1-D scaling mode
*
@@ -225,18 +224,6 @@ typedef enum
} vp8e_token_partitions;
/*!\brief VP8 model tuning parameters
*
* Changes the encoder to tune for certain types of input material.
*
*/
typedef enum
{
VP8_TUNE_PSNR,
VP8_TUNE_SSIM
} vp8e_tuning;
/*!\brief VP8 encoder control function parameter type
*
* Defines the data types that VP8E control functions take. Note that
@@ -266,7 +253,7 @@ VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, vp8e_token_partitions)
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)

View File

@@ -36,30 +36,6 @@ extern vpx_codec_iface_t* vpx_codec_vp8_dx(void);
#include "vp8.h"
/*!\brief VP8 decoder control functions
*
* The set of macros define the control functions of VP8 decoder interface
*/
enum vp8d_dec_control_id
{
VP8_DECODER_CTRL_ID_START = 256,
VP8D_GET_LAST_REF_UPDATES, /**< control function to get info on which reference frames were updated
by the last decode */
VP8_DECODER_CTRL_ID_MAX
} ;
/*!\brief VP8 encoder control function parameter type
*
* Defines the data types that VP8E control functions take. Note that
* additional common controls are defined in vp8.h
*
*/
VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *)
/*! @} - end defgroup vp8_decoder */

View File

@@ -74,7 +74,6 @@ void __cpuid(int CPUInfo[4], int info_type);
#define HAS_SSE2 0x04
#define HAS_SSE3 0x08
#define HAS_SSSE3 0x10
#define HAS_SSE4_1 0x20
#ifndef BIT
#define BIT(n) (1<<n)
#endif
@@ -118,8 +117,6 @@ x86_simd_caps(void)
if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
return flags & mask;
}

View File

@@ -35,7 +35,6 @@
#if CONFIG_MD5
#include "md5_utils.h"
#endif
#include "tools_common.h"
#include "nestegg/include/nestegg/nestegg.h"
#ifndef PATH_MAX
@@ -108,19 +107,11 @@ static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level",
"Enable VP8 demacroblocking, w/ level");
static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
"Enable VP8 visible debug info");
static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
"Display only selected reference frame per macro block");
static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
"Display only selected macro block modes");
static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
"Display only selected block modes");
static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
"Draw only selected motion vectors");
static const arg_def_t *vp8_pp_args[] =
{
&addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
&pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs,
NULL
};
#endif
@@ -323,8 +314,7 @@ void *out_open(const char *out_fn, int do_md5)
}
else
{
FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb")
: set_binary_mode(stdout);
FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout;
if (!outfile)
{
@@ -442,8 +432,6 @@ unsigned int file_is_raw(FILE *infile,
int is_raw = 0;
vpx_codec_stream_info_t si;
si.sz = sizeof(si);
if (fread(buf, 1, 32, infile) == 32)
{
int i;
@@ -552,7 +540,6 @@ webm_guess_framerate(struct input_ctx *input,
*fps_den = tstamp / 1000;
return 0;
fail:
nestegg_destroy(input->nestegg_ctx);
input->nestegg_ctx = NULL;
rewind(input->infile);
return 1;
@@ -715,10 +702,6 @@ int main(int argc, const char **argv_)
vpx_codec_dec_cfg_t cfg = {0};
#if CONFIG_VP8_DECODER
vp8_postproc_cfg_t vp8_pp_cfg = {0};
int vp8_dbg_color_ref_frame = 0;
int vp8_dbg_color_mb_modes = 0;
int vp8_dbg_color_b_modes = 0;
int vp8_dbg_display_mv = 0;
#endif
struct input_ctx input = {0};
@@ -804,42 +787,6 @@ int main(int argc, const char **argv_)
if (level)
vp8_pp_cfg.post_proc_flag |= level;
}
else if (arg_match(&arg, &pp_disp_ref_frame, argi))
{
unsigned int flags = arg_parse_int(&arg);
if (flags)
{
postproc = 1;
vp8_dbg_color_ref_frame = flags;
}
}
else if (arg_match(&arg, &pp_disp_mb_modes, argi))
{
unsigned int flags = arg_parse_int(&arg);
if (flags)
{
postproc = 1;
vp8_dbg_color_mb_modes = flags;
}
}
else if (arg_match(&arg, &pp_disp_b_modes, argi))
{
unsigned int flags = arg_parse_int(&arg);
if (flags)
{
postproc = 1;
vp8_dbg_color_b_modes = flags;
}
}
else if (arg_match(&arg, &pp_disp_mvs, argi))
{
unsigned int flags = arg_parse_int(&arg);
if (flags)
{
postproc = 1;
vp8_dbg_display_mv = flags;
}
}
#endif
else
@@ -858,7 +805,7 @@ int main(int argc, const char **argv_)
usage_exit();
/* Open file */
infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
if (!infile)
{
@@ -929,13 +876,7 @@ int main(int argc, const char **argv_)
}
if(input.kind == WEBM_FILE)
if(webm_guess_framerate(&input, &fps_den, &fps_num))
{
fprintf(stderr, "Failed to guess framerate -- error parsing "
"webm file?\n");
return EXIT_FAILURE;
}
webm_guess_framerate(&input, &fps_den, &fps_num);
/*Note: We can't output an aspect ratio here because IVF doesn't
store one, and neither does VP8.
@@ -979,33 +920,6 @@ int main(int argc, const char **argv_)
return EXIT_FAILURE;
}
if (vp8_dbg_color_ref_frame
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame))
{
fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_mb_modes
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes))
{
fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_b_modes
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes))
{
fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_display_mv
&& vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv))
{
fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
#endif
/* Decode file */

107
vpxenc.c Executable file → Normal file
View File

@@ -35,11 +35,9 @@
#include "vpx/vp8cx.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
#include "tools_common.h"
#include "y4minput.h"
#include "libmkv/EbmlWriter.h"
#include "libmkv/EbmlIDs.h"
#include "experimental.h"
/* Need special handling of these functions on Windows */
#if defined(_MSC_VER)
@@ -187,11 +185,11 @@ int stats_open_mem(stats_io_t *stats, int pass)
}
void stats_close(stats_io_t *stats, int last_pass)
void stats_close(stats_io_t *stats)
{
if (stats->file)
{
if (stats->pass == last_pass)
if (stats->pass == 1)
{
#if 0
#elif USE_POSIX_MMAP
@@ -206,7 +204,7 @@ void stats_close(stats_io_t *stats, int last_pass)
}
else
{
if (stats->pass == last_pass)
if (stats->pass == 1)
free(stats->buf.buf);
}
}
@@ -252,8 +250,7 @@ enum video_file_type
struct detect_buffer {
char buf[4];
size_t buf_read;
size_t position;
int valid;
};
@@ -307,21 +304,14 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
for (r = 0; r < h; r++)
{
size_t needed = w;
size_t buf_position = 0;
const size_t left = detect->buf_read - detect->position;
if (left > 0)
if (detect->valid)
{
const size_t more = (left < needed) ? left : needed;
memcpy(ptr, detect->buf + detect->position, more);
buf_position = more;
needed -= more;
detect->position += more;
}
if (needed > 0)
{
shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
memcpy(ptr, detect->buf, 4);
shortread |= fread(ptr+4, 1, w-4, f) < w-4;
detect->valid = 0;
}
else
shortread |= fread(ptr, 1, w, f) < w;
ptr += img->stride[plane];
}
@@ -348,12 +338,12 @@ unsigned int file_is_ivf(FILE *infile,
unsigned int *fourcc,
unsigned int *width,
unsigned int *height,
struct detect_buffer *detect)
char detect[4])
{
char raw_hdr[IVF_FILE_HDR_SZ];
int is_ivf = 0;
if(memcmp(detect->buf, "DKIF", 4) != 0)
if(memcmp(detect, "DKIF", 4) != 0)
return 0;
/* See write_ivf_file_header() for more documentation on the file header
@@ -377,7 +367,6 @@ unsigned int file_is_ivf(FILE *infile,
{
*width = mem_get_le16(raw_hdr + 12);
*height = mem_get_le16(raw_hdr + 14);
detect->position = 4;
}
return is_ivf;
@@ -445,7 +434,7 @@ struct EbmlGlobal
int debug;
FILE *stream;
int64_t last_pts_ms;
uint64_t last_pts_ms;
vpx_rational_t framerate;
/* These pointers are to the start of an element */
@@ -658,7 +647,7 @@ write_webm_block(EbmlGlobal *glob,
unsigned char track_number;
unsigned short block_timecode = 0;
unsigned char flags;
int64_t pts_ms;
uint64_t pts_ms;
int start_cluster = 0, is_keyframe;
/* Calculate the PTS of this frame in milliseconds */
@@ -989,32 +978,23 @@ static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
"Enable automatic alt reference frames");
static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
"AltRef Max Frames");
"alt_ref Max Frames");
static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1,
"AltRef Strength");
"alt_ref Strength");
static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1,
"AltRef Type");
static const struct arg_enum_list tuning_enum[] = {
{"psnr", VP8_TUNE_PSNR},
{"ssim", VP8_TUNE_SSIM},
{NULL, 0}
};
static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1,
"Material to favor", tuning_enum);
"alt_ref Type");
static const arg_def_t *vp8_args[] =
{
&cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
&tune_ssim, NULL
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, NULL
};
static const int vp8_arg_ctrl_map[] =
{
VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
VP8E_SET_TOKEN_PARTITIONS,
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE,
VP8E_SET_TUNING, 0
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE, 0
};
#endif
@@ -1040,7 +1020,6 @@ static void usage_exit()
#if CONFIG_VP8_ENCODER
fprintf(stderr, "\nVP8 Specific Options:\n");
arg_show_usage(stdout, vp8_args);
xxx_show_usage(stdout);
#endif
fprintf(stderr, "\n"
"Included encoders:\n"
@@ -1094,7 +1073,6 @@ int main(int argc, const char **argv_)
int psnr_count = 0;
exec_name = argv_[0];
ebml.last_pts_ms = -1;
if (argc < 3)
usage_exit();
@@ -1175,7 +1153,6 @@ int main(int argc, const char **argv_)
out_fn = arg.val;
else if (arg_match(&arg, &debugmode, argi))
ebml.debug = 1;
else if (xxx_parse_arg(argi));
else
argj++;
}
@@ -1212,12 +1189,6 @@ int main(int argc, const char **argv_)
*/
cfg.g_timebase.den = 1000;
/* Never use the library's default resolution, require it be parsed
* from the file or set on the command line.
*/
cfg.g_w = 0;
cfg.g_h = 0;
/* Now parse the remainder of the parameters. */
for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
{
@@ -1329,7 +1300,7 @@ int main(int argc, const char **argv_)
if (arg_ctrl_cnt < ARG_CTRL_CNT_MAX)
{
arg_ctrls[arg_ctrl_cnt][0] = ctrl_args_map[i];
arg_ctrls[arg_ctrl_cnt][1] = arg_parse_enum_or_int(&arg);
arg_ctrls[arg_ctrl_cnt][1] = arg_parse_int(&arg);
arg_ctrl_cnt++;
}
}
@@ -1359,11 +1330,11 @@ int main(int argc, const char **argv_)
{
int frames_in = 0, frames_out = 0;
unsigned long nbytes = 0;
size_t detect_bytes;
struct detect_buffer detect;
/* Parse certain options from the input file, if possible */
infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb")
: set_binary_mode(stdin);
infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") : stdin;
if (!infile)
{
@@ -1373,11 +1344,13 @@ int main(int argc, const char **argv_)
/* For RAW input sources, these bytes will applied on the first frame
* in read_frame().
* We can always read 4 bytes because the minimum supported frame size
* is 2x2.
*/
detect.buf_read = fread(detect.buf, 1, 4, infile);
detect.position = 0;
detect_bytes = fread(detect.buf, 1, 4, infile);
detect.valid = 0;
if (detect.buf_read == 4 && file_is_y4m(infile, &y4m, detect.buf))
if (detect_bytes == 4 && file_is_y4m(infile, &y4m, detect.buf))
{
if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0)
{
@@ -1402,8 +1375,8 @@ int main(int argc, const char **argv_)
return EXIT_FAILURE;
}
}
else if (detect.buf_read == 4 &&
file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, &detect))
else if (detect_bytes == 4 &&
file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
{
file_type = FILE_TYPE_IVF;
switch (fourcc)
@@ -1422,15 +1395,8 @@ int main(int argc, const char **argv_)
else
{
file_type = FILE_TYPE_RAW;
detect.valid = 1;
}
if(!cfg.g_w || !cfg.g_h)
{
fprintf(stderr, "Specify stream dimensions with --width (-w) "
" and --height (-h).\n");
return EXIT_FAILURE;
}
#define SHOW(field) fprintf(stderr, " %-28s = %d\n", #field, cfg.field)
if (verbose && pass == 0)
@@ -1483,8 +1449,7 @@ int main(int argc, const char **argv_)
cfg.g_w, cfg.g_h, 1);
}
outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb")
: set_binary_mode(stdout);
outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
if (!outfile)
{
@@ -1562,7 +1527,7 @@ int main(int argc, const char **argv_)
vpx_codec_iter_t iter = NULL;
const vpx_codec_cx_pkt_t *pkt;
struct vpx_usec_timer timer;
int64_t frame_start, next_frame_start;
int64_t frame_start;
if (!arg_limit || frames_in < arg_limit)
{
@@ -1583,11 +1548,9 @@ int main(int argc, const char **argv_)
frame_start = (cfg.g_timebase.den * (int64_t)(frames_in - 1)
* arg_framerate.den) / cfg.g_timebase.num / arg_framerate.num;
next_frame_start = (cfg.g_timebase.den * (int64_t)(frames_in)
* arg_framerate.den)
/ cfg.g_timebase.num / arg_framerate.num;
vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, frame_start,
next_frame_start - frame_start,
cfg.g_timebase.den * arg_framerate.den
/ cfg.g_timebase.num / arg_framerate.num,
0, arg_deadline);
vpx_usec_timer_mark(&timer);
cx_time += vpx_usec_timer_elapsed(&timer);
@@ -1695,7 +1658,7 @@ int main(int argc, const char **argv_)
}
fclose(outfile);
stats_close(&stats, arg_passes-1);
stats_close(&stats);
fprintf(stderr, "\n");
if (one_pass_only)