Compare commits
2 Commits
sandbox/jk
...
aylesbury
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
755e2a290b | ||
|
|
30ba8f2ae3 |
45
args.c
45
args.c
@@ -135,17 +135,6 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
|
||||
def->long_name, long_val);
|
||||
|
||||
fprintf(fp, " %-37s\t%s\n", option_text, def->desc);
|
||||
|
||||
if(def->enums)
|
||||
{
|
||||
const struct arg_enum_list *listptr;
|
||||
|
||||
fprintf(fp, " %-37s\t ", "");
|
||||
|
||||
for(listptr = def->enums; listptr->name; listptr++)
|
||||
fprintf(fp, "%s%s", listptr->name,
|
||||
listptr[1].name ? ", " : "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -229,37 +218,3 @@ struct vpx_rational arg_parse_rational(const struct arg *arg)
|
||||
|
||||
return rat;
|
||||
}
|
||||
|
||||
|
||||
int arg_parse_enum(const struct arg *arg)
|
||||
{
|
||||
const struct arg_enum_list *listptr;
|
||||
long int rawval;
|
||||
char *endptr;
|
||||
|
||||
/* First see if the value can be parsed as a raw value */
|
||||
rawval = strtol(arg->val, &endptr, 10);
|
||||
if (arg->val[0] != '\0' && endptr[0] == '\0')
|
||||
{
|
||||
/* Got a raw value, make sure it's valid */
|
||||
for(listptr = arg->def->enums; listptr->name; listptr++)
|
||||
if(listptr->val == rawval)
|
||||
return rawval;
|
||||
}
|
||||
|
||||
/* Next see if it can be parsed as a string */
|
||||
for(listptr = arg->def->enums; listptr->name; listptr++)
|
||||
if(!strcmp(arg->val, listptr->name))
|
||||
return listptr->val;
|
||||
|
||||
die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int arg_parse_enum_or_int(const struct arg *arg)
|
||||
{
|
||||
if(arg->def->enums)
|
||||
return arg_parse_enum(arg);
|
||||
return arg_parse_int(arg);
|
||||
}
|
||||
|
||||
12
args.h
12
args.h
@@ -22,23 +22,14 @@ struct arg
|
||||
const struct arg_def *def;
|
||||
};
|
||||
|
||||
struct arg_enum_list
|
||||
{
|
||||
const char *name;
|
||||
int val;
|
||||
};
|
||||
#define ARG_ENUM_LIST_END {0}
|
||||
|
||||
typedef struct arg_def
|
||||
{
|
||||
const char *short_name;
|
||||
const char *long_name;
|
||||
int has_val;
|
||||
const char *desc;
|
||||
const struct arg_enum_list *enums;
|
||||
} arg_def_t;
|
||||
#define ARG_DEF(s,l,v,d) {s,l,v,d, NULL}
|
||||
#define ARG_DEF_ENUM(s,l,v,d,e) {s,l,v,d,e}
|
||||
#define ARG_DEF(s,l,v,d) {s,l,v,d}
|
||||
#define ARG_DEF_LIST_END {0}
|
||||
|
||||
struct arg arg_init(char **argv);
|
||||
@@ -50,5 +41,4 @@ char **argv_dup(int argc, const char **argv);
|
||||
unsigned int arg_parse_uint(const struct arg *arg);
|
||||
int arg_parse_int(const struct arg *arg);
|
||||
struct vpx_rational arg_parse_rational(const struct arg *arg);
|
||||
int arg_parse_enum_or_int(const struct arg *arg);
|
||||
#endif
|
||||
|
||||
@@ -547,10 +547,6 @@ process_common_toolchain() {
|
||||
tgt_isa=universal
|
||||
tgt_os=darwin9
|
||||
;;
|
||||
*darwin10*)
|
||||
tgt_isa=x86_64
|
||||
tgt_os=darwin10
|
||||
;;
|
||||
*mingw32*|*cygwin*)
|
||||
[ -z "$tgt_isa" ] && tgt_isa=x86
|
||||
tgt_os=win32
|
||||
@@ -610,12 +606,6 @@ process_common_toolchain() {
|
||||
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk"
|
||||
add_ldflags "-mmacosx-version-min=10.5"
|
||||
;;
|
||||
*-darwin10-*)
|
||||
add_cflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
|
||||
add_cflags "-mmacosx-version-min=10.6"
|
||||
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
|
||||
add_ldflags "-mmacosx-version-min=10.6"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Handle Solaris variants. Solaris 10 needs -lposix4
|
||||
@@ -834,7 +824,6 @@ process_common_toolchain() {
|
||||
soft_enable sse2
|
||||
soft_enable sse3
|
||||
soft_enable ssse3
|
||||
soft_enable sse4_1
|
||||
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
@@ -890,7 +879,7 @@ process_common_toolchain() {
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
add_asflags -f win${bits}
|
||||
enabled debug && add_asflags -g cv8
|
||||
enabled debug && add_asflags -g dwarf2
|
||||
;;
|
||||
linux*|solaris*)
|
||||
add_asflags -f elf${bits}
|
||||
|
||||
11
configure
vendored
11
configure
vendored
@@ -41,7 +41,6 @@ Advanced options:
|
||||
${toggle_shared} shared library support
|
||||
${toggle_small} favor smaller size over speed
|
||||
${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only)
|
||||
${toggle_postproc_visualizer} macro block / block level visualizers
|
||||
|
||||
Codecs:
|
||||
Codecs can be selectively enabled or disabled individually, or by family:
|
||||
@@ -115,7 +114,6 @@ all_platforms="${all_platforms} x86-win32-vs7"
|
||||
all_platforms="${all_platforms} x86-win32-vs8"
|
||||
all_platforms="${all_platforms} x86-win32-vs9"
|
||||
all_platforms="${all_platforms} x86_64-darwin9-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-icc"
|
||||
all_platforms="${all_platforms} x86_64-solaris-gcc"
|
||||
@@ -201,7 +199,6 @@ ARCH_EXT_LIST="
|
||||
sse2
|
||||
sse3
|
||||
ssse3
|
||||
sse4_1
|
||||
|
||||
altivec
|
||||
"
|
||||
@@ -252,7 +249,6 @@ CONFIG_LIST="
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
"
|
||||
CMDLINE_SELECT="
|
||||
extra_warnings
|
||||
@@ -292,7 +288,6 @@ CMDLINE_SELECT="
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
"
|
||||
|
||||
process_cmdline() {
|
||||
@@ -329,6 +324,8 @@ post_process_cmdline() {
|
||||
for c in ${CODECS}; do
|
||||
enabled ${c} && enable ${c##*_}s
|
||||
done
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -538,10 +535,6 @@ process_toolchain() {
|
||||
|
||||
# Other toolchain specific defaults
|
||||
case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
|
||||
|
||||
if enabled postproc_visualizer; then
|
||||
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ vpxdec.SRCS += md5_utils.c md5_utils.h
|
||||
vpxdec.SRCS += vpx_ports/vpx_timer.h
|
||||
vpxdec.SRCS += vpx/vpx_integer.h
|
||||
vpxdec.SRCS += args.c args.h vpx_ports/config.h
|
||||
vpxdec.SRCS += tools_common.c tools_common.h
|
||||
vpxdec.SRCS += nestegg/halloc/halloc.h
|
||||
vpxdec.SRCS += nestegg/halloc/src/align.h
|
||||
vpxdec.SRCS += nestegg/halloc/src/halloc.c
|
||||
@@ -29,13 +28,11 @@ vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950
|
||||
vpxdec.DESCRIPTION = Full featured decoder
|
||||
UTILS-$(CONFIG_ENCODERS) += vpxenc.c
|
||||
vpxenc.SRCS += args.c args.h y4minput.c y4minput.h
|
||||
vpxenc.SRCS += tools_common.c tools_common.h
|
||||
vpxenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h
|
||||
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
vpxenc.SRCS += libmkv/EbmlIDs.h
|
||||
vpxenc.SRCS += libmkv/EbmlWriter.c
|
||||
vpxenc.SRCS += libmkv/EbmlWriter.h
|
||||
vpxenc.SRCS += experimental.c
|
||||
vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
|
||||
vpxenc.DESCRIPTION = Full featured encoder
|
||||
|
||||
|
||||
@@ -78,8 +78,8 @@ if(frame_cnt + 1 == 22) {
|
||||
} else if(frame_cnt + 1 == 44) {
|
||||
vpx_active_map_t active;
|
||||
|
||||
active.rows = cfg.g_h/16;
|
||||
active.cols = cfg.g_w/16;
|
||||
active.rows = 240/16;
|
||||
active.cols = 320/16;
|
||||
|
||||
/* pass in null map to disable active_map*/
|
||||
active.active_map = NULL;
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
#define EXPERIMENTAL_C
|
||||
#include <stdio.h>
|
||||
|
||||
#include "args.h"
|
||||
|
||||
/* Get argument definitions */
|
||||
#include "experimental.h"
|
||||
|
||||
/* Build argument definition list */
|
||||
static const arg_def_t *xxx_def_list[] = {
|
||||
#include "experimental.h"
|
||||
NULL
|
||||
};
|
||||
|
||||
void xxx_show_usage(FILE *fp)
|
||||
{
|
||||
arg_show_usage(fp, xxx_def_list);
|
||||
}
|
||||
|
||||
int xxx_parse_arg(char **argi)
|
||||
{
|
||||
struct arg arg;
|
||||
|
||||
arg = arg_init(argi);
|
||||
if(0);
|
||||
#include "experimental.h"
|
||||
else return 0;
|
||||
return 1;
|
||||
}
|
||||
@@ -1,56 +0,0 @@
|
||||
#if defined(EXPERIMENTAL_C)
|
||||
/* The experimental.c file includes this file multiple times to build up the
|
||||
* required state.
|
||||
*/
|
||||
#if !defined(XXX_ARG_DEF)
|
||||
#define XXX_ARG_DEF(sym, value) \
|
||||
static const arg_def_t xxx_arg_def_##sym = \
|
||||
ARG_DEF(NULL, #sym, 1, "Experimental");
|
||||
|
||||
#define XXX_DEFINE_INT(sym, value) \
|
||||
XXX_ARG_DEF(sym, value); int xxx_##sym = value;
|
||||
#define XXX_DEFINE_UINT(sym, value) \
|
||||
XXX_ARG_DEF(sym, value); unsigned int xxx_##sym = value;
|
||||
|
||||
#elif !defined(XXX_ARG_DEF_LIST)
|
||||
#define XXX_ARG_DEF_LIST(sym) &xxx_arg_def_##sym,
|
||||
|
||||
#undef XXX_DEFINE_INT
|
||||
#define XXX_DEFINE_INT(sym, value) XXX_ARG_DEF_LIST(sym)
|
||||
|
||||
#undef XXX_DEFINE_UINT
|
||||
#define XXX_DEFINE_UINT(sym, value) XXX_ARG_DEF_LIST(sym)
|
||||
|
||||
#elif !defined(XXX_ARG_MATCH)
|
||||
#define XXX_ARG_MATCH
|
||||
|
||||
#undef XXX_DEFINE_INT
|
||||
#define XXX_DEFINE_INT(sym, value)\
|
||||
else if (arg_match(&arg, &xxx_arg_def_##sym, argi)) \
|
||||
xxx_##sym = arg_parse_int(&arg);
|
||||
|
||||
#undef XXX_DEFINE_UINT
|
||||
#define XXX_DEFINE_UINT(sym, value)\
|
||||
else if (arg_match(&arg, &xxx_arg_def_##sym, argi)) \
|
||||
xxx_##sym = arg_parse_uint(&arg);
|
||||
|
||||
#endif
|
||||
#else
|
||||
/* All other files just get the extern references to these symbols. */
|
||||
|
||||
#define XXX_DEFINE_INT(sym, value) extern int xxx_##sym;
|
||||
#define XXX_DEFINE_UINT(sym, value) extern unsigned int xxx_##sym;
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
void xxx_show_usage(FILE *fp);
|
||||
int xxx_parse_arg(char **argi);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* BEGIN EXPERIMENTS BELOW
|
||||
*
|
||||
* XXX_DEFINE_INT(knob, 0)
|
||||
*/
|
||||
XXX_DEFINE_INT(foo, 0)
|
||||
XXX_DEFINE_INT(bar, 0)
|
||||
@@ -1,24 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include "tools_common.h"
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
FILE* set_binary_mode(FILE *stream)
|
||||
{
|
||||
(void)stream;
|
||||
#ifdef _WIN32
|
||||
_setmode(_fileno(stream), _O_BINARY);
|
||||
#endif
|
||||
return stream;
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef TOOLS_COMMON_H
|
||||
#define TOOLS_COMMON_H
|
||||
|
||||
/* Sets a stdio stream into binary mode */
|
||||
FILE* set_binary_mode(FILE *stream);
|
||||
|
||||
#endif
|
||||
@@ -36,14 +36,6 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
@@ -114,20 +106,23 @@ static void init_bit_trees()
|
||||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
|
||||
|
||||
static vp8bc_index_t bcc1[1], bcc2[2], bcc3[3], bcc4[4], bcc5[5], bcc6[11];
|
||||
|
||||
vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
{ 0, 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, bcc1, 1, 5},
|
||||
{ cat2, Pcat2, bcc2, 2, 7},
|
||||
{ cat3, Pcat3, bcc3, 3, 11},
|
||||
{ cat4, Pcat4, bcc4, 4, 19},
|
||||
{ cat5, Pcat5, bcc5, 5, 35},
|
||||
{ cat6, Pcat6, bcc6, 11, 67},
|
||||
{ 0, 0, 0, 0, 0}
|
||||
};
|
||||
#include "defaultcoefcounts.h"
|
||||
|
||||
|
||||
@@ -24,10 +24,10 @@
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define vp8_coef_tokens 12
|
||||
@@ -42,6 +42,7 @@ typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
vp8bc_index_t *prob_bc;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
@@ -94,7 +95,6 @@ struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
|
||||
extern short vp8_default_zig_zag_mask[16];
|
||||
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
|
||||
|
||||
|
||||
@@ -65,13 +65,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
||||
|
||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c;
|
||||
rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c;
|
||||
rtcd->postproc.blend_b = vp8_blend_b_c;
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb = vp8_blend_mb_c;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -18,7 +18,6 @@ extern "C"
|
||||
#endif
|
||||
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "type_aliases.h"
|
||||
#include "ppflags.h"
|
||||
@@ -190,8 +189,6 @@ extern "C"
|
||||
|
||||
struct vpx_fixed_buf two_pass_stats_in;
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
|
||||
vp8e_tuning tuning;
|
||||
} VP8_CONFIG;
|
||||
|
||||
|
||||
@@ -207,7 +204,7 @@ extern "C"
|
||||
// and not just a copy of the pointer..
|
||||
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
|
||||
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags);
|
||||
|
||||
int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
|
||||
@@ -105,7 +105,7 @@ typedef struct VP8Common
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */
|
||||
FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
|
||||
@@ -51,7 +51,7 @@ extern "C"
|
||||
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
|
||||
|
||||
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags);
|
||||
|
||||
int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
|
||||
|
||||
/* global constants */
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
|
||||
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
|
||||
@@ -41,32 +41,13 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
|
||||
{ RGB_TO_YUV(0xFF0000) } /* Red */
|
||||
};
|
||||
|
||||
static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x6633ff) }, /* Purple */
|
||||
{ RGB_TO_YUV(0xcc33ff) }, /* Magenta */
|
||||
{ RGB_TO_YUV(0xff33cc) }, /* Pink */
|
||||
{ RGB_TO_YUV(0xff3366) }, /* Coral */
|
||||
{ RGB_TO_YUV(0x3366ff) }, /* Blue */
|
||||
{ RGB_TO_YUV(0xed00f5) }, /* Dark Blue */
|
||||
{ RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */
|
||||
{ RGB_TO_YUV(0xff6633) }, /* Orange */
|
||||
{ RGB_TO_YUV(0x33ccff) }, /* Light Blue */
|
||||
{ RGB_TO_YUV(0x8ab800) }, /* Green */
|
||||
{ RGB_TO_YUV(0xffcc33) }, /* Light Orange */
|
||||
{ RGB_TO_YUV(0x33ffcc) }, /* Aqua */
|
||||
{ RGB_TO_YUV(0x66ff33) }, /* Light Green */
|
||||
{ RGB_TO_YUV(0xccff33) }, /* Yellow */
|
||||
};
|
||||
|
||||
static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] =
|
||||
static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x00ff00) }, /* Blue */
|
||||
{ RGB_TO_YUV(0x0000ff) }, /* Green */
|
||||
{ RGB_TO_YUV(0xffff00) }, /* Yellow */
|
||||
{ RGB_TO_YUV(0xff0000) }, /* Red */
|
||||
};
|
||||
#endif
|
||||
|
||||
static const short kernel5[] =
|
||||
{
|
||||
@@ -495,7 +476,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
|
||||
* edges unblended to give distinction to macro blocks in areas
|
||||
* filled with the same color block.
|
||||
*/
|
||||
void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
@@ -503,10 +484,10 @@ void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
y += 2*stride + 2;
|
||||
for (i = 0; i < 12; i++)
|
||||
y += stride + 2;
|
||||
for (i = 0; i < 14; i++)
|
||||
{
|
||||
for (j = 0; j < 12; j++)
|
||||
for (j = 0; j < 14; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
@@ -530,104 +511,6 @@ void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
}
|
||||
}
|
||||
|
||||
/* Blend only the edge of the macro block. Leave center
|
||||
* unblended to allow for other visualizations to be layered.
|
||||
*/
|
||||
void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
int y1_const = y1*((1<<16)-alpha);
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
for (i = 0; i < 12; i++)
|
||||
{
|
||||
y[0] = (y[0]*alpha + y1_const)>>16;
|
||||
y[1] = (y[1]*alpha + y1_const)>>16;
|
||||
y[14] = (y[14]*alpha + y1_const)>>16;
|
||||
y[15] = (y[15]*alpha + y1_const)>>16;
|
||||
y += stride;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
stride >>= 1;
|
||||
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
u += stride;
|
||||
v += stride;
|
||||
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
u[0] = (u[0]*alpha + u1_const)>>16;
|
||||
v[0] = (v[0]*alpha + v1_const)>>16;
|
||||
|
||||
u[7] = (u[7]*alpha + u1_const)>>16;
|
||||
v[7] = (v[7]*alpha + v1_const)>>16;
|
||||
|
||||
u += stride;
|
||||
v += stride;
|
||||
}
|
||||
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
int y1_const = y1*((1<<16)-alpha);
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
stride >>= 1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
u += stride;
|
||||
v += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
|
||||
{
|
||||
int dx;
|
||||
@@ -639,7 +522,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*x1 = width;
|
||||
if (dx)
|
||||
if (dy)
|
||||
*y1 = ((width-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*x1 < 0)
|
||||
@@ -648,7 +531,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*x1 = 0;
|
||||
if (dx)
|
||||
if (dy)
|
||||
*y1 = ((0-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*y1 > height)
|
||||
@@ -657,7 +540,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*y1 = height;
|
||||
if (dy)
|
||||
if (dx)
|
||||
*x1 = ((height-y0)*dx)/dy + x0;
|
||||
}
|
||||
if (*y1 < 0)
|
||||
@@ -666,7 +549,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*y1 = 0;
|
||||
if (dy)
|
||||
if (dx)
|
||||
*x1 = ((0-y0)*dx)/dy + x0;
|
||||
}
|
||||
}
|
||||
@@ -678,13 +561,10 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
#define RTCD_VTABLE(oci) NULL
|
||||
#endif
|
||||
|
||||
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
|
||||
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
|
||||
{
|
||||
char message[512];
|
||||
int q = oci->filter_level * 10 / 6;
|
||||
int flags = ppflags->post_proc_flag;
|
||||
int deblock_level = ppflags->deblocking_level;
|
||||
int noise_level = ppflags->noise_level;
|
||||
|
||||
if (!oci->frame_to_show)
|
||||
return -1;
|
||||
@@ -741,8 +621,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
if (flags & VP8D_DEBUG_TXT_FRAME_INFO)
|
||||
if (flags & VP8D_DEBUG_LEVEL1)
|
||||
{
|
||||
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
|
||||
(oci->frame_type == KEY_FRAME),
|
||||
@@ -754,7 +633,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_TXT_MBLK_MODES)
|
||||
if (flags & VP8D_DEBUG_LEVEL2)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
@@ -786,7 +665,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_TXT_DC_DIFF)
|
||||
if (flags & VP8D_DEBUG_LEVEL3)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
@@ -821,14 +700,45 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
|
||||
if (flags & VP8D_DEBUG_LEVEL4)
|
||||
{
|
||||
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
#if 0
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int mb_rows = post->y_height >> 4;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_index = 0;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
for (i = 0; i < mb_rows; i++)
|
||||
{
|
||||
for (j = 0; j < mb_cols; j++)
|
||||
{
|
||||
char zz[4];
|
||||
|
||||
sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
|
||||
vp8_blit_text(zz, y_ptr, post->y_stride);
|
||||
mb_index ++;
|
||||
y_ptr += 16;
|
||||
}
|
||||
|
||||
mb_index ++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* Draw motion vectors */
|
||||
if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag)
|
||||
if (flags & VP8D_DEBUG_LEVEL5)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
@@ -839,144 +749,29 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
MODE_INFO *mi = oci->mi;
|
||||
int x0, y0;
|
||||
|
||||
for (y0 = 0; y0 < height; y0 += 16)
|
||||
for (y0 = 8; y0 < (height + 8); y0 += 16)
|
||||
{
|
||||
for (x0 = 0; x0 < width; x0 += 16)
|
||||
for (x0 = 8; x0 < (width + 8); x0 += 16)
|
||||
{
|
||||
int x1, y1;
|
||||
|
||||
if (!(ppflags->display_mv_flag & (1<<mi->mbmi.mode)))
|
||||
{
|
||||
mi++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (mi->mbmi.mode == SPLITMV)
|
||||
{
|
||||
switch (mi->mbmi.partitioning)
|
||||
{
|
||||
case 0 : /* mv_top_bottom */
|
||||
{
|
||||
B_MODE_INFO *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 8 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+8, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[8];
|
||||
|
||||
x1 = x0 + 8 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+8, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
case 1 : /* mv_left_right */
|
||||
{
|
||||
B_MODE_INFO *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 + 8 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+8, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[2];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 + 8 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+8, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
case 2 : /* mv_quarters */
|
||||
{
|
||||
B_MODE_INFO *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[2];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[8];
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[10];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride);
|
||||
break;
|
||||
}
|
||||
default :
|
||||
{
|
||||
B_MODE_INFO *bmi = mi->bmi;
|
||||
int bx0, by0;
|
||||
|
||||
for (by0 = y0; by0 < (y0+16); by0 += 4)
|
||||
{
|
||||
for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
|
||||
{
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = bx0 + 2 + (mv->col >> 3);
|
||||
y1 = by0 + 2 + (mv->row >> 3);
|
||||
|
||||
constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
|
||||
vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride);
|
||||
|
||||
bmi++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (mi->mbmi.mode >= NEARESTMV)
|
||||
int x1, y1;
|
||||
if (mi->mbmi.mode >= NEARESTMV)
|
||||
{
|
||||
MV *mv = &mi->mbmi.mv.as_mv;
|
||||
const int lx0 = x0 + 8;
|
||||
const int ly0 = y0 + 8;
|
||||
|
||||
x1 = lx0 + (mv->col >> 3);
|
||||
y1 = ly0 + (mv->row >> 3);
|
||||
x1 = x0 + (mv->col >> 3);
|
||||
y1 = y0 + (mv->row >> 3);
|
||||
|
||||
if (x1 != lx0 && y1 != ly0)
|
||||
if (x1 != x0 && y1 != y0)
|
||||
{
|
||||
constrain_line (lx0, &x1, ly0-1, &y1, width, height);
|
||||
vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride);
|
||||
constrain_line (x0, &x1, y0-1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0-1, y1, y_buffer, y_stride);
|
||||
|
||||
constrain_line (lx0, &x1, ly0+1, &y1, width, height);
|
||||
vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride);
|
||||
constrain_line (x0, &x1, y0+1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0+1, y1, y_buffer, y_stride);
|
||||
}
|
||||
else
|
||||
vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride);
|
||||
vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
|
||||
}
|
||||
|
||||
mi++;
|
||||
}
|
||||
mi++;
|
||||
@@ -984,10 +779,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
}
|
||||
|
||||
/* Color in block modes */
|
||||
if ((flags & VP8D_DEBUG_CLR_BLK_MODES)
|
||||
&& (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag))
|
||||
if (flags & VP8D_DEBUG_LEVEL6)
|
||||
{
|
||||
int y, x;
|
||||
int i, j;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
@@ -997,54 +791,18 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (y = 0; y < height; y += 16)
|
||||
for (i = 0; i < height; i += 16)
|
||||
{
|
||||
for (x = 0; x < width; x += 16)
|
||||
for (j = 0; j < width; j += 16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
if (mi->mbmi.mode == B_PRED &&
|
||||
((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag))
|
||||
{
|
||||
int by, bx;
|
||||
unsigned char *yl, *ul, *vl;
|
||||
B_MODE_INFO *bmi = mi->bmi;
|
||||
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
|
||||
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
|
||||
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
|
||||
|
||||
yl = y_ptr + x;
|
||||
ul = u_ptr + (x>>1);
|
||||
vl = v_ptr + (x>>1);
|
||||
|
||||
for (by = 0; by < 16; by += 4)
|
||||
{
|
||||
for (bx = 0; bx < 16; bx += 4)
|
||||
{
|
||||
if ((ppflags->display_b_modes_flag & (1<<mi->mbmi.mode))
|
||||
|| (ppflags->display_mb_modes_flag & B_PRED))
|
||||
{
|
||||
Y = B_PREDICTION_MODE_colors[bmi->mode][0];
|
||||
U = B_PREDICTION_MODE_colors[bmi->mode][1];
|
||||
V = B_PREDICTION_MODE_colors[bmi->mode][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
|
||||
(yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
bmi++;
|
||||
}
|
||||
|
||||
yl += y_stride*4;
|
||||
ul += y_stride*1;
|
||||
vl += y_stride*1;
|
||||
}
|
||||
}
|
||||
else if (ppflags->display_mb_modes_flag & (1<<mi->mbmi.mode))
|
||||
{
|
||||
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
|
||||
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
|
||||
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
|
||||
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
|
||||
mi++;
|
||||
}
|
||||
@@ -1057,9 +815,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
}
|
||||
|
||||
/* Color in frame reference blocks */
|
||||
if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag)
|
||||
if (flags & VP8D_DEBUG_LEVEL7)
|
||||
{
|
||||
int y, x;
|
||||
int i, j;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
@@ -1069,21 +827,18 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (y = 0; y < height; y += 16)
|
||||
for (i = 0; i < height; i += 16)
|
||||
{
|
||||
for (x = 0; x < width; x +=16)
|
||||
for (j = 0; j < width; j +=16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
if (ppflags->display_ref_frame_flag & (1<<mi->mbmi.ref_frame))
|
||||
{
|
||||
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
|
||||
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
|
||||
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
|
||||
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
|
||||
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
|
||||
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
|
||||
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
|
||||
mi++;
|
||||
}
|
||||
@@ -1094,7 +849,6 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
mi++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*dest = oci->post_proc_buffer;
|
||||
|
||||
|
||||
@@ -24,15 +24,7 @@
|
||||
char whiteclamp[16], char bothclamp[16],\
|
||||
unsigned int w, unsigned int h, int pitch)
|
||||
|
||||
#define prototype_postproc_blend_mb_inner(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
#define prototype_postproc_blend_mb_outer(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
#define prototype_postproc_blend_b(sym)\
|
||||
#define prototype_postproc_blend_mb(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
@@ -60,36 +52,22 @@ extern prototype_postproc(vp8_postproc_downacross);
|
||||
#endif
|
||||
extern prototype_postproc_addnoise(vp8_postproc_addnoise);
|
||||
|
||||
#ifndef vp8_postproc_blend_mb_inner
|
||||
#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
|
||||
#ifndef vp8_postproc_blend_mb
|
||||
#define vp8_postproc_blend_mb vp8_blend_mb_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
|
||||
|
||||
#ifndef vp8_postproc_blend_mb_outer
|
||||
#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
|
||||
|
||||
#ifndef vp8_postproc_blend_b
|
||||
#define vp8_postproc_blend_b vp8_blend_b_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_b(vp8_postproc_blend_b);
|
||||
extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
|
||||
|
||||
typedef prototype_postproc((*vp8_postproc_fn_t));
|
||||
typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
|
||||
typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
|
||||
typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
|
||||
typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
|
||||
typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
|
||||
typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
|
||||
typedef struct
|
||||
{
|
||||
vp8_postproc_inplace_fn_t down;
|
||||
vp8_postproc_inplace_fn_t across;
|
||||
vp8_postproc_fn_t downacross;
|
||||
vp8_postproc_addnoise_fn_t addnoise;
|
||||
vp8_postproc_blend_mb_inner_fn_t blend_mb_inner;
|
||||
vp8_postproc_blend_mb_outer_fn_t blend_mb_outer;
|
||||
vp8_postproc_blend_b_fn_t blend_b;
|
||||
vp8_postproc_inplace_fn_t down;
|
||||
vp8_postproc_inplace_fn_t across;
|
||||
vp8_postproc_fn_t downacross;
|
||||
vp8_postproc_addnoise_fn_t addnoise;
|
||||
vp8_postproc_blend_mb_fn_t blend_mb;
|
||||
} vp8_postproc_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -111,7 +89,7 @@ struct postproc_state
|
||||
#include "onyxc_int.h"
|
||||
#include "ppflags.h"
|
||||
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
|
||||
vp8_ppflags_t *flags);
|
||||
int deblock_level, int noise_level, int flags);
|
||||
|
||||
|
||||
void vp8_de_noise(YV12_BUFFER_CONFIG *source,
|
||||
|
||||
@@ -13,28 +13,17 @@
|
||||
#define __INC_PPFLAGS_H
|
||||
enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
|
||||
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
|
||||
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
|
||||
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
|
||||
VP8D_DEBUG_DRAW_MV = 1<<7,
|
||||
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
|
||||
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_LEVEL1 = 1<<3,
|
||||
VP8D_DEBUG_LEVEL2 = 1<<4,
|
||||
VP8D_DEBUG_LEVEL3 = 1<<5,
|
||||
VP8D_DEBUG_LEVEL4 = 1<<6,
|
||||
VP8D_DEBUG_LEVEL5 = 1<<7,
|
||||
VP8D_DEBUG_LEVEL6 = 1<<8,
|
||||
VP8D_DEBUG_LEVEL7 = 1<<9
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
int display_ref_frame_flag;
|
||||
int display_mb_modes_flag;
|
||||
int display_b_modes_flag;
|
||||
int display_mv_flag;
|
||||
} vp8_ppflags_t;
|
||||
|
||||
#endif
|
||||
|
||||
46
vp8/common/preproc.h
Normal file
46
vp8/common/preproc.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : preproc.h
|
||||
*
|
||||
* Description : simple preprocessor
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __INC_PREPROC_H
|
||||
#define __INC_PREPROC_H
|
||||
|
||||
/****************************************************************************
|
||||
* Types
|
||||
****************************************************************************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned char *frame_buffer;
|
||||
int frame;
|
||||
unsigned int *fixed_divide;
|
||||
|
||||
unsigned char *frame_buffer_alloc;
|
||||
unsigned int *fixed_divide_alloc;
|
||||
} pre_proc_instance;
|
||||
|
||||
/****************************************************************************
|
||||
* Functions.
|
||||
****************************************************************************/
|
||||
void pre_proc_machine_specific_config(void);
|
||||
void delete_pre_proc(pre_proc_instance *ppi);
|
||||
int init_pre_proc(pre_proc_instance *ppi, int frame_size);
|
||||
extern void spatial_filter_c(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int width, int height, int pitch, int strength);
|
||||
extern void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
|
||||
|
||||
#endif
|
||||
76
vp8/common/preprocif.h
Normal file
76
vp8/common/preprocif.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : preproc_if.h
|
||||
*
|
||||
* Description : Pre-processor interface header file.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __PREPROC_IF_H
|
||||
#define __PREPROC_IF_H
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
#include "type_aliases.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Types
|
||||
****************************************************************************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UINT8 *Yuv0ptr;
|
||||
UINT8 *Yuv1ptr;
|
||||
|
||||
UINT8 *frag_info; // blocks coded : passed in
|
||||
UINT32 frag_info_element_size; // size of each element
|
||||
UINT32 frag_info_coded_mask; // mask to get at whether fragment is coded
|
||||
|
||||
UINT32 *region_index; // Gives pixel index for top left of each block
|
||||
UINT32 video_frame_height;
|
||||
UINT32 video_frame_width;
|
||||
UINT8 hfrag_pixels;
|
||||
UINT8 vfrag_pixels;
|
||||
|
||||
} SCAN_CONFIG_DATA;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SCP_FILTER_ON_OFF,
|
||||
SCP_SET_SRF_OFFSET,
|
||||
SCP_SET_EBO_ON_OFF,
|
||||
SCP_SET_VCAP_LEVEL_OFFSET,
|
||||
SCP_SET_SHOW_LOCAL
|
||||
|
||||
} SCP_SETTINGS;
|
||||
|
||||
typedef struct PP_INSTANCE *x_pp_inst;
|
||||
|
||||
/****************************************************************************
|
||||
* Module statics
|
||||
****************************************************************************/
|
||||
/* Controls whether Early break out is on or off in default case */
|
||||
#define EARLY_BREAKOUT_DEFAULT TRUE
|
||||
|
||||
/****************************************************************************
|
||||
* Functions
|
||||
****************************************************************************/
|
||||
extern void set_scan_param(x_pp_inst ppi, UINT32 param_id, INT32 param_value);
|
||||
extern UINT32 yuvanalyse_frame(x_pp_inst ppi, UINT32 *KFIndicator);
|
||||
extern x_pp_inst create_pp_instance(void);
|
||||
extern void delete_pp_instance(x_pp_inst *);
|
||||
extern BOOL scan_yuvinit(x_pp_inst, SCAN_CONFIG_DATA *scan_config_ptr);
|
||||
|
||||
#endif
|
||||
@@ -19,7 +19,7 @@
|
||||
extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
|
||||
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
|
||||
extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
|
||||
extern void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
|
||||
extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
|
||||
extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -506,7 +506,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
pbi->common.error.setjmp = 0;
|
||||
return retcode;
|
||||
}
|
||||
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags)
|
||||
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
|
||||
{
|
||||
int ret = -1;
|
||||
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
|
||||
@@ -524,7 +524,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
|
||||
|
||||
sd->clrtype = pbi->common.clr_type;
|
||||
#if CONFIG_POSTPROC
|
||||
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
|
||||
ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
|
||||
#else
|
||||
|
||||
if (pbi->common.frame_to_show)
|
||||
|
||||
@@ -596,7 +596,7 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
VP8_COMMON *const pc = & pbi->common;
|
||||
@@ -647,6 +647,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
for (i=0; i< pc->mb_rows; i++)
|
||||
CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) width;
|
||||
|
||||
@@ -29,9 +29,10 @@
|
||||
push {r4-r11, lr}
|
||||
|
||||
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
|
||||
; sizeof (TOKENEXTRA) is 8
|
||||
; sizeof (TOKENEXTRA) is 20
|
||||
add r2, r2, r2, lsl #2 ; xcount
|
||||
sub sp, sp, #12
|
||||
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
|
||||
add r2, r1, r2, lsl #2 ; stop = p + xcount
|
||||
str r2, [sp, #0]
|
||||
str r3, [sp, #8] ; save vp8_coef_encodings
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
@@ -40,13 +41,13 @@
|
||||
b check_p_lt_stop
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #8] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
ldr r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
@@ -141,11 +142,12 @@ token_count_lt_zero
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #48] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
; element. Here vp8_extra_bit_struct == 20
|
||||
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
@@ -153,7 +155,7 @@ token_count_lt_zero
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
|
||||
@@ -62,13 +62,13 @@ mb_row_loop
|
||||
; actuall work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #20] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
ldr r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
@@ -163,11 +163,12 @@ token_count_lt_zero
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #8] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
; element. Here vp8_extra_bit_struct == 20
|
||||
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
@@ -175,7 +176,7 @@ token_count_lt_zero
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
|
||||
@@ -90,13 +90,13 @@ mb_row_loop
|
||||
; actual work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #80] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
ldr r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
@@ -191,11 +191,12 @@ token_count_lt_zero
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #84] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
; element. Here vp8_extra_bit_struct == 20
|
||||
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
@@ -203,7 +204,7 @@ token_count_lt_zero
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
|
||||
|
||||
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant_fast);
|
||||
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -51,6 +51,7 @@ DEFINE(vp8_token_len, offsetof(vp8_token, Len));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
|
||||
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
|
||||
DEFINE(vp8_extra_bit_struct_prob_bc, offsetof(vp8_extra_bit_struct, prob_bc));
|
||||
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
|
||||
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
|
||||
|
||||
@@ -66,8 +67,8 @@ DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
|
||||
// These two sizes are used in vp7cx_pack_tokens. They are hard coded
|
||||
// so if the size changes this will have to be adjusted.
|
||||
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
|
||||
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
|
||||
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 20)
|
||||
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 20)
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
||||
|
||||
@@ -33,7 +33,6 @@ typedef struct
|
||||
|
||||
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
|
||||
short *quant;
|
||||
short *quant_fast;
|
||||
short *quant_shift;
|
||||
short *zbin;
|
||||
short *zrun_zbin_boost;
|
||||
@@ -82,7 +81,6 @@ typedef struct
|
||||
int errthresh;
|
||||
int rddiv;
|
||||
int rdmult;
|
||||
INT64 activity_sum;
|
||||
|
||||
int mvcosts[2][MVvals+1];
|
||||
int *mvcost[2];
|
||||
|
||||
@@ -62,6 +62,7 @@ unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
static const int qrounding_factors[129] =
|
||||
{
|
||||
56, 56, 56, 56, 48, 48, 56, 56,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
@@ -77,18 +78,12 @@ static const int qrounding_factors[129] =
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48
|
||||
48,
|
||||
};
|
||||
|
||||
static const int qzbin_factors[129] =
|
||||
{
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
72, 72, 72, 72, 80, 80, 72, 72,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
@@ -99,11 +94,17 @@ static const int qzbin_factors[129] =
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80,
|
||||
};
|
||||
|
||||
static const int qrounding_factors_y2[129] =
|
||||
{
|
||||
56, 56, 56, 56, 48, 48, 56, 56,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
@@ -119,18 +120,12 @@ static const int qrounding_factors_y2[129] =
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48
|
||||
48,
|
||||
};
|
||||
|
||||
static const int qzbin_factors_y2[129] =
|
||||
{
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
72, 72, 72, 72, 80, 80, 72, 72,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
@@ -141,30 +136,26 @@ static const int qzbin_factors_y2[129] =
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80,
|
||||
};
|
||||
|
||||
#define EXACT_QUANT
|
||||
//#define EXACT_QUANT
|
||||
#ifdef EXACT_QUANT
|
||||
static void vp8cx_invert_quant(int improved_quant, short *quant,
|
||||
short *shift, short d)
|
||||
static void vp8cx_invert_quant(short *quant, short *shift, short d)
|
||||
{
|
||||
if(improved_quant)
|
||||
{
|
||||
unsigned t;
|
||||
int l;
|
||||
t = d;
|
||||
for(l = 0; t > 1; l++)
|
||||
t>>=1;
|
||||
t = 1 + (1<<(16+l))/d;
|
||||
*quant = (short)(t - (1<<16));
|
||||
*shift = l;
|
||||
}
|
||||
else
|
||||
{
|
||||
*quant = (1 << 16) / d;
|
||||
*shift = 0;
|
||||
}
|
||||
unsigned t;
|
||||
int l;
|
||||
t = d;
|
||||
for(l = 0; t > 1; l++)
|
||||
t>>=1;
|
||||
t = 1 + (1<<(16+l))/d;
|
||||
*quant = (short)(t - (1<<16));
|
||||
*shift = l;
|
||||
}
|
||||
|
||||
void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
@@ -179,8 +170,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
{
|
||||
// dc values
|
||||
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
|
||||
cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
|
||||
vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
|
||||
cpi->Y1quant_shift[Q] + 0, quant_val);
|
||||
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -188,8 +178,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||
|
||||
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
|
||||
cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
|
||||
vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
|
||||
cpi->Y2quant_shift[Q] + 0, quant_val);
|
||||
cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
|
||||
@@ -197,8 +186,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||
|
||||
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
|
||||
cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
|
||||
vp8cx_invert_quant(cpi->UVquant[Q] + 0,
|
||||
cpi->UVquant_shift[Q] + 0, quant_val);
|
||||
cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
|
||||
cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -211,8 +199,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
int rc = vp8_default_zig_zag1d[i];
|
||||
|
||||
quant_val = vp8_ac_yquant(Q);
|
||||
cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
|
||||
vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
|
||||
cpi->Y1quant_shift[Q] + rc, quant_val);
|
||||
cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -220,8 +207,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||
|
||||
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
|
||||
cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
|
||||
vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
|
||||
cpi->Y2quant_shift[Q] + rc, quant_val);
|
||||
cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
|
||||
@@ -229,8 +215,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||
|
||||
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
|
||||
cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
|
||||
vp8cx_invert_quant(cpi->UVquant[Q] + rc,
|
||||
cpi->UVquant_shift[Q] + rc, quant_val);
|
||||
cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -331,7 +316,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
x->block[i].quant = cpi->Y1quant[QIndex];
|
||||
x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
|
||||
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
|
||||
x->block[i].zbin = cpi->Y1zbin[QIndex];
|
||||
x->block[i].round = cpi->Y1round[QIndex];
|
||||
@@ -346,7 +330,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
for (i = 16; i < 24; i++)
|
||||
{
|
||||
x->block[i].quant = cpi->UVquant[QIndex];
|
||||
x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
|
||||
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
|
||||
x->block[i].zbin = cpi->UVzbin[QIndex];
|
||||
x->block[i].round = cpi->UVround[QIndex];
|
||||
@@ -357,7 +340,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
|
||||
// Y2
|
||||
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
|
||||
x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
|
||||
x->block[24].quant = cpi->Y2quant[QIndex];
|
||||
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
|
||||
x->block[24].zbin = cpi->Y2zbin[QIndex];
|
||||
@@ -369,9 +351,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
|
||||
void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
|
||||
{
|
||||
// Clear Zbin mode boost for default case
|
||||
cpi->zbin_mode_boost = 0;
|
||||
|
||||
// vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
|
||||
// when these values are not all zero.
|
||||
if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
|
||||
@@ -384,62 +363,6 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
|
||||
/* activity_avg must be positive, or flat regions could get a zero weight
|
||||
* (infinite lambda), which confounds analysis.
|
||||
* This also avoids the need for divide by zero checks in
|
||||
* vp8_activity_masking().
|
||||
*/
|
||||
#define VP8_ACTIVITY_AVG_MIN (64)
|
||||
|
||||
/* This is used as a reference when computing the source variance for the
|
||||
* purposes of activity masking.
|
||||
* Eventually this should be replaced by custom no-reference routines,
|
||||
* which will be faster.
|
||||
*/
|
||||
static const unsigned char VP8_VAR_OFFS[16]=
|
||||
{
|
||||
128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
|
||||
};
|
||||
|
||||
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
{
|
||||
unsigned int act;
|
||||
unsigned int sse;
|
||||
int sum;
|
||||
unsigned int a;
|
||||
unsigned int b;
|
||||
unsigned int d;
|
||||
/* TODO: This could also be done over smaller areas (8x8), but that would
|
||||
* require extensive changes elsewhere, as lambda is assumed to be fixed
|
||||
* over an entire MB in most of the code.
|
||||
* Another option is to compute four 8x8 variances, and pick a single
|
||||
* lambda using a non-linear combination (e.g., the smallest, or second
|
||||
* smallest, etc.).
|
||||
*/
|
||||
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
|
||||
x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
|
||||
/* This requires a full 32 bits of precision. */
|
||||
act = (sse<<8) - sum*sum;
|
||||
/* Drop 4 to give us some headroom to work with. */
|
||||
act = (act + 8) >> 4;
|
||||
/* If the region is flat, lower the activity some more. */
|
||||
if (act < 8<<12)
|
||||
act = act < 5<<12 ? act : 5<<12;
|
||||
/* TODO: For non-flat regions, edge regions should receive less masking
|
||||
* than textured regions, but identifying edge regions quickly and
|
||||
* reliably enough is still a subject of experimentation.
|
||||
* This will be most noticable near edges with a complex shape (e.g.,
|
||||
* text), but the 4x4 transform size should make this less of a problem
|
||||
* than it would be for an 8x8 transform.
|
||||
*/
|
||||
/* Apply the masking to the RD multiplier. */
|
||||
a = act + 4*cpi->activity_avg;
|
||||
b = 4*act + cpi->activity_avg;
|
||||
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
|
||||
return act;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static
|
||||
void encode_mb_row(VP8_COMP *cpi,
|
||||
@@ -451,7 +374,6 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
int *segment_counts,
|
||||
int *totalrate)
|
||||
{
|
||||
INT64 activity_sum = 0;
|
||||
int i;
|
||||
int recon_yoffset, recon_uvoffset;
|
||||
int mb_col;
|
||||
@@ -480,14 +402,14 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
// Set up limit values for vertical motion vector components
|
||||
// to prevent them extending beyond the UMV borders
|
||||
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
|
||||
+ (VP8BORDERINPIXELS - 16);
|
||||
|
||||
// for each macroblock col in image
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
// Distance of Mb to the left & right edges, specified in
|
||||
// 1/8th pel units as they are always compared to values
|
||||
// Distance of Mb to the left & right edges, specified in
|
||||
// 1/8th pel units as they are always compared to values
|
||||
// that are in 1/8th pel units
|
||||
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
||||
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
|
||||
@@ -495,7 +417,7 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
// Set up limit values for horizontal motion vector components
|
||||
// to prevent them extending beyond the UMV borders
|
||||
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
|
||||
+ (VP8BORDERINPIXELS - 16);
|
||||
|
||||
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
|
||||
@@ -503,12 +425,6 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
|
||||
xd->left_available = (mb_col != 0);
|
||||
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
|
||||
activity_sum += vp8_activity_masking(cpi, x);
|
||||
|
||||
// Is segmentation enabled
|
||||
// MB level adjutment to quantizer
|
||||
if (xd->segmentation_enabled)
|
||||
@@ -615,7 +531,6 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
// this is to account for the border
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
x->activity_sum += activity_sum;
|
||||
}
|
||||
|
||||
|
||||
@@ -732,7 +647,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
|
||||
vp8_setup_block_ptrs(x);
|
||||
|
||||
x->activity_sum = 0;
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
#if 0
|
||||
// Experimental rd code
|
||||
@@ -787,12 +703,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
else
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int i;
|
||||
|
||||
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
|
||||
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
|
||||
{
|
||||
int i;
|
||||
cpi->current_mb_col_main = -1;
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
@@ -870,11 +785,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
totalrate += cpi->mb_row_ei[i].totalrate;
|
||||
}
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
@@ -1010,14 +920,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
cpi->last_frame_distortion = cpi->frame_distortion;
|
||||
#endif
|
||||
|
||||
/* Update the average activity for the next frame.
|
||||
* This is feed-forward for now; it could also be saved in two-pass, or
|
||||
* done during lookahead when that is eventually added.
|
||||
*/
|
||||
cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
|
||||
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
|
||||
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
|
||||
|
||||
}
|
||||
void vp8_setup_block_ptrs(MACROBLOCK *x)
|
||||
{
|
||||
@@ -1279,18 +1181,7 @@ int vp8cx_encode_inter_macroblock
|
||||
|
||||
if (cpi->sf.RD)
|
||||
{
|
||||
/* Are we using the fast quantizer for the mode selection? */
|
||||
if(cpi->sf.use_fastquant_for_pick)
|
||||
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
|
||||
|
||||
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
|
||||
|
||||
/* switch back to the regular quantizer for the encode */
|
||||
if (cpi->sf.improved_quant)
|
||||
{
|
||||
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -1323,25 +1214,11 @@ int vp8cx_encode_inter_macroblock
|
||||
// Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
|
||||
if (cpi->zbin_mode_boost_enabled)
|
||||
{
|
||||
if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
|
||||
cpi->zbin_mode_boost = 0;
|
||||
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
|
||||
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
|
||||
else
|
||||
{
|
||||
if (xd->mode_info_context->mbmi.mode == ZEROMV)
|
||||
{
|
||||
if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
|
||||
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
|
||||
else
|
||||
cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
|
||||
}
|
||||
else if (xd->mode_info_context->mbmi.mode == SPLITMV)
|
||||
cpi->zbin_mode_boost = 0;
|
||||
else
|
||||
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
|
||||
}
|
||||
cpi->zbin_mode_boost = 0;
|
||||
}
|
||||
else
|
||||
cpi->zbin_mode_boost = 0;
|
||||
|
||||
vp8cx_mb_init_quantizer(cpi, x);
|
||||
}
|
||||
|
||||
@@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
#if 1
|
||||
if (x->optimize)
|
||||
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
|
||||
vp8_optimize_mby(x, rtcd);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -243,9 +243,9 @@ struct vp8_token_state{
|
||||
};
|
||||
|
||||
// TODO: experiments to find optimal multiple numbers
|
||||
#define Y1_RD_MULT 4
|
||||
#define UV_RD_MULT 2
|
||||
#define Y2_RD_MULT 16
|
||||
#define Y1_RD_MULT 1
|
||||
#define UV_RD_MULT 1
|
||||
#define Y2_RD_MULT 4
|
||||
|
||||
static const int plane_rd_mult[4]=
|
||||
{
|
||||
@@ -309,10 +309,8 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
|
||||
eob = d->eob;
|
||||
|
||||
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
|
||||
rdmult = mb->rdmult * err_mult;
|
||||
if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME)
|
||||
rdmult = (rdmult * 9)>>4;
|
||||
|
||||
/* TODO: These should vary with the block type, since the quantizer does. */
|
||||
rdmult = (mb->rdmult << 2)*err_mult;
|
||||
rddiv = mb->rddiv;
|
||||
best_mask[0] = best_mask[1] = 0;
|
||||
/* Initialize the sentinel node of the trellis. */
|
||||
@@ -635,7 +633,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
vp8_quantize_mb(x);
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
if (x->optimize)
|
||||
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
|
||||
vp8_optimize_mb(x, rtcd);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -61,7 +61,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
|
||||
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
|
||||
volatile int *last_row_current_mb_col;
|
||||
INT64 activity_sum = 0;
|
||||
|
||||
if (ithread > 0)
|
||||
last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
|
||||
@@ -112,12 +111,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
|
||||
xd->left_available = (mb_col != 0);
|
||||
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
|
||||
activity_sum += vp8_activity_masking(cpi, x);
|
||||
|
||||
// Is segmentation enabled
|
||||
// MB level adjutment to quantizer
|
||||
if (xd->segmentation_enabled)
|
||||
@@ -133,7 +126,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
else
|
||||
xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
|
||||
|
||||
x->active_ptr = cpi->active_map + seg_map_index + mb_col;
|
||||
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
@@ -165,28 +157,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
|
||||
cpi->inter_zz_count ++;
|
||||
|
||||
// Special case code for cyclic refresh
|
||||
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
|
||||
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
|
||||
if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
|
||||
{
|
||||
cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
|
||||
|
||||
// If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
|
||||
// Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
|
||||
// else mark it as dirty (1).
|
||||
if (xd->mode_info_context->mbmi.segment_id)
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
|
||||
else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
|
||||
{
|
||||
if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
|
||||
}
|
||||
else
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
cpi->tplist[mb_row].stop = *tp;
|
||||
|
||||
x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
|
||||
@@ -225,7 +197,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
// this is to account for the border
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
x->activity_sum += activity_sum;
|
||||
|
||||
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
|
||||
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
@@ -269,6 +240,8 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
z->sadperbit16 = x->sadperbit16;
|
||||
z->sadperbit4 = x->sadperbit4;
|
||||
z->errthresh = x->errthresh;
|
||||
z->rddiv = x->rddiv;
|
||||
z->rdmult = x->rdmult;
|
||||
|
||||
/*
|
||||
z->mv_col_min = x->mv_col_min;
|
||||
@@ -282,7 +255,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4;
|
||||
z->short_walsh4x4 = x->short_walsh4x4;
|
||||
z->quantize_b = x->quantize_b;
|
||||
z->optimize = x->optimize;
|
||||
|
||||
/*
|
||||
z->mvc = x->mvc;
|
||||
@@ -310,7 +282,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
for (i = 0; i < 25; i++)
|
||||
{
|
||||
z->block[i].quant = x->block[i].quant;
|
||||
z->block[i].quant_fast = x->block[i].quant_fast;
|
||||
z->block[i].quant_shift = x->block[i].quant_shift;
|
||||
z->block[i].zbin = x->block[i].zbin;
|
||||
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
|
||||
@@ -421,7 +392,8 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
|
||||
vp8_setup_block_ptrs(mb);
|
||||
|
||||
mb->activity_sum = 0;
|
||||
mb->rddiv = cpi->RDDIV;
|
||||
mb->rdmult = cpi->RDMULT;
|
||||
|
||||
mbd->left_context = &cm->left_context;
|
||||
mb->mvc = cm->fc.mvc;
|
||||
|
||||
@@ -472,7 +472,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
|
||||
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
|
||||
|
||||
// Initial step/diamond search centred on best mv
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
|
||||
if ( tmp_err < INT_MAX-new_mv_mode_penalty )
|
||||
tmp_err += new_mv_mode_penalty;
|
||||
|
||||
@@ -495,7 +495,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
|
||||
if ( tmp_err < INT_MAX-new_mv_mode_penalty )
|
||||
tmp_err += new_mv_mode_penalty;
|
||||
|
||||
@@ -1145,7 +1145,6 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
cpi->output_frame_rate = cpi->oxcf.frame_rate;
|
||||
cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
|
||||
cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0);
|
||||
cpi->clip_bits_total = cpi->bits_left;
|
||||
|
||||
vp8_avg_stats(cpi->total_stats);
|
||||
|
||||
@@ -1174,25 +1173,17 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
start_pos = cpi->stats_in; // Note starting "file" position
|
||||
|
||||
cpi->modified_error_total = 0.0;
|
||||
cpi->modified_error_used = 0.0;
|
||||
cpi->modified_total_error_left = 0.0;
|
||||
|
||||
while (vp8_input_stats(cpi, &this_frame) != EOF)
|
||||
{
|
||||
cpi->modified_error_total += calculate_modified_err(cpi, &this_frame);
|
||||
cpi->modified_total_error_left += calculate_modified_err(cpi, &this_frame);
|
||||
}
|
||||
cpi->modified_error_left = cpi->modified_error_total;
|
||||
|
||||
reset_fpf_position(cpi, start_pos); // Reset file position
|
||||
|
||||
}
|
||||
|
||||
// Calculate the clip target modified bits per error
|
||||
// The observed bpe starts as the same number.
|
||||
cpi->clip_bpe = cpi->bits_left /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_error_total);
|
||||
cpi->observed_bpe = cpi->clip_bpe;
|
||||
|
||||
cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in;
|
||||
}
|
||||
|
||||
@@ -1448,7 +1439,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
// Boost for arf frame
|
||||
Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
|
||||
Boost += (i * 50);
|
||||
Boost += (cpi->baseline_gf_interval * 50);
|
||||
allocation_chunks = (i * 100) + Boost;
|
||||
|
||||
// Normalize Altboost and allocations chunck down to prevent overflow
|
||||
@@ -1594,9 +1585,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// Reset the file position
|
||||
reset_fpf_position(cpi, start_pos);
|
||||
|
||||
// Update the record of error used so far (only done once per gf group)
|
||||
cpi->modified_error_used += gf_group_err;
|
||||
|
||||
// Assign bits to the arf or gf.
|
||||
{
|
||||
int Boost;
|
||||
@@ -1750,6 +1738,16 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
vp8_avg_stats(§ionstats);
|
||||
|
||||
if (sectionstats.pcnt_motion < .17)
|
||||
cpi->section_is_low_motion = 1;
|
||||
else
|
||||
cpi->section_is_low_motion = 0;
|
||||
|
||||
if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
|
||||
cpi->section_is_fast_motion = 1;
|
||||
else
|
||||
cpi->section_is_fast_motion = 0;
|
||||
|
||||
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
|
||||
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
@@ -1894,16 +1892,6 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
// Is this a GF / ARF (Note that a KF is always also a GF)
|
||||
if (cpi->frames_till_gf_update_due == 0)
|
||||
{
|
||||
// Update monitor of the bits per error observed so far.
|
||||
// Done once per gf group based on what has gone before
|
||||
// so do nothing if this is the first frame.
|
||||
if (cpi->common.current_video_frame > 0)
|
||||
{
|
||||
cpi->observed_bpe =
|
||||
(double)(cpi->clip_bits_total - cpi->bits_left) /
|
||||
cpi->modified_error_used;
|
||||
}
|
||||
|
||||
// Define next gf group and assign bits to it
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
define_gf_group(cpi, &this_frame_copy);
|
||||
@@ -1992,14 +1980,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
cpi->ni_av_qi = cpi->worst_quality;
|
||||
}
|
||||
}
|
||||
// The last few frames of a clip almost always have to few or too many
|
||||
// bits and for the sake of over exact rate control we dont want to make
|
||||
// radical adjustments to the allowed quantizer range just to use up a
|
||||
// few surplus bits or get beneath the target rate.
|
||||
else if ( (cpi->common.current_video_frame <
|
||||
(((unsigned int)cpi->total_stats->count * 255)>>8)) &&
|
||||
((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
|
||||
(unsigned int)cpi->total_stats->count) )
|
||||
else
|
||||
{
|
||||
if (frames_left < 1)
|
||||
frames_left = 1;
|
||||
@@ -2218,7 +2199,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
}
|
||||
|
||||
// Calculate the number of bits that should be assigned to the kf group.
|
||||
if ((cpi->bits_left > 0) && ((int)cpi->modified_error_left > 0))
|
||||
if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0))
|
||||
{
|
||||
// Max for a single normal frame (not key frame)
|
||||
int max_bits = frame_max_bits(cpi);
|
||||
@@ -2230,7 +2211,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// complexity of the section
|
||||
cpi->kf_group_bits = (long long)( cpi->bits_left *
|
||||
( kf_group_err /
|
||||
cpi->modified_error_left ));
|
||||
cpi->modified_total_error_left ));
|
||||
|
||||
// Clip based on maximum per frame rate defined by the user.
|
||||
max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key;
|
||||
@@ -2363,7 +2344,17 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
vp8_avg_stats(§ionstats);
|
||||
|
||||
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
if (sectionstats.pcnt_motion < .17)
|
||||
cpi->section_is_low_motion = 1;
|
||||
else
|
||||
cpi->section_is_low_motion = 0;
|
||||
|
||||
if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
|
||||
cpi->section_is_fast_motion = 1;
|
||||
else
|
||||
cpi->section_is_fast_motion = 0;
|
||||
|
||||
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
|
||||
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
// if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
|
||||
@@ -2483,7 +2474,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
double alt_kf_grp_bits =
|
||||
((double)cpi->bits_left *
|
||||
(kf_mod_err * (double)cpi->frames_to_key) /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_error_left));
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left));
|
||||
|
||||
alt_kf_bits = (int)((double)kf_boost *
|
||||
(alt_kf_grp_bits / (double)allocation_chunks));
|
||||
@@ -2501,7 +2492,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
alt_kf_bits =
|
||||
(int)((double)cpi->bits_left *
|
||||
(kf_mod_err /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_error_left)));
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)));
|
||||
|
||||
if (alt_kf_bits > cpi->kf_bits)
|
||||
{
|
||||
@@ -2521,7 +2512,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
// Adjust the count of total modified error left.
|
||||
// The count of bits left is adjusted elsewhere based on real coded frame sizes
|
||||
cpi->modified_error_left -= kf_group_err;
|
||||
cpi->modified_total_error_left -= kf_group_err;
|
||||
|
||||
if (cpi->oxcf.allow_spatial_resampling)
|
||||
{
|
||||
|
||||
@@ -40,12 +40,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c;
|
||||
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c;
|
||||
|
||||
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c;
|
||||
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c;
|
||||
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c;
|
||||
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c;
|
||||
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c;
|
||||
|
||||
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c;
|
||||
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c;
|
||||
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c;
|
||||
@@ -94,8 +88,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
|
||||
cpi->rtcd.search.full_search = vp8_full_search_sad;
|
||||
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
|
||||
|
||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
|
||||
#endif
|
||||
|
||||
// Pure C:
|
||||
|
||||
@@ -913,8 +913,7 @@ int vp8_diamond_search_sad
|
||||
int *num00,
|
||||
vp8_variance_fn_ptr_t *fn_ptr,
|
||||
int *mvsadcost[2],
|
||||
int *mvcost[2],
|
||||
MV *center_mv
|
||||
int *mvcost[2]
|
||||
)
|
||||
{
|
||||
int i, j, step;
|
||||
@@ -941,8 +940,6 @@ int vp8_diamond_search_sad
|
||||
unsigned char *check_here;
|
||||
int thissad;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
// Work out the start point for the search
|
||||
in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
|
||||
best_address = in_what;
|
||||
@@ -952,7 +949,7 @@ int vp8_diamond_search_sad
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Check the starting position
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// search_param determines the length of the initial step and hence the number of iterations
|
||||
@@ -964,6 +961,8 @@ int vp8_diamond_search_sad
|
||||
best_mv->row = ref_row;
|
||||
best_mv->col = ref_col;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
for (step = 0; step < tot_steps ; step++)
|
||||
{
|
||||
for (j = 0 ; j < x->searches_per_step ; j++)
|
||||
@@ -983,7 +982,7 @@ int vp8_diamond_search_sad
|
||||
{
|
||||
this_mv.row = this_row_offset << 3;
|
||||
this_mv.col = this_col_offset << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1014,7 +1013,7 @@ int vp8_diamond_search_sad
|
||||
return INT_MAX;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
}
|
||||
|
||||
int vp8_diamond_search_sadx4
|
||||
@@ -1029,8 +1028,7 @@ int vp8_diamond_search_sadx4
|
||||
int *num00,
|
||||
vp8_variance_fn_ptr_t *fn_ptr,
|
||||
int *mvsadcost[2],
|
||||
int *mvcost[2],
|
||||
MV *center_mv
|
||||
int *mvcost[2]
|
||||
)
|
||||
{
|
||||
int i, j, step;
|
||||
@@ -1057,8 +1055,6 @@ int vp8_diamond_search_sadx4
|
||||
unsigned char *check_here;
|
||||
unsigned int thissad;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
// Work out the start point for the search
|
||||
in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
|
||||
best_address = in_what;
|
||||
@@ -1068,7 +1064,7 @@ int vp8_diamond_search_sadx4
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Check the starting position
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// search_param determines the length of the initial step and hence the number of iterations
|
||||
@@ -1080,6 +1076,8 @@ int vp8_diamond_search_sadx4
|
||||
best_mv->row = ref_row;
|
||||
best_mv->col = ref_col;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
for (step = 0; step < tot_steps ; step++)
|
||||
{
|
||||
int all_in = 1, t;
|
||||
@@ -1110,7 +1108,7 @@ int vp8_diamond_search_sadx4
|
||||
{
|
||||
this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
|
||||
this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
|
||||
sad_array[t] += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (sad_array[t] < bestsad)
|
||||
{
|
||||
@@ -1139,7 +1137,7 @@ int vp8_diamond_search_sadx4
|
||||
{
|
||||
this_mv.row = this_row_offset << 3;
|
||||
this_mv.col = this_col_offset << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1170,12 +1168,12 @@ int vp8_diamond_search_sadx4
|
||||
return INT_MAX;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
}
|
||||
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
|
||||
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
|
||||
{
|
||||
unsigned char *what = (*(b->base_src) + b->src);
|
||||
int what_stride = b->src_stride;
|
||||
@@ -1213,7 +1211,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
// Baseline value at the centre
|
||||
|
||||
//bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
@@ -1241,7 +1239,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
this_mv.col = c << 3;
|
||||
//thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
//thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1260,12 +1258,12 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
|
||||
int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
|
||||
{
|
||||
unsigned char *what = (*(b->base_src) + b->src);
|
||||
int what_stride = b->src_stride;
|
||||
@@ -1303,7 +1301,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Baseline value at the centre
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
@@ -1325,7 +1323,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
check_here = r * mv_stride + in_what + col_min;
|
||||
c = col_min;
|
||||
|
||||
while ((c + 2) < col_max)
|
||||
while ((c + 3) < col_max)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1338,7 +1336,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1361,7 +1359,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1383,165 +1381,13 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
|
||||
{
|
||||
unsigned char *what = (*(b->base_src) + b->src);
|
||||
int what_stride = b->src_stride;
|
||||
unsigned char *in_what;
|
||||
int in_what_stride = d->pre_stride;
|
||||
int mv_stride = d->pre_stride;
|
||||
unsigned char *bestaddress;
|
||||
MV *best_mv = &d->bmi.mv.as_mv;
|
||||
MV this_mv;
|
||||
int bestsad = INT_MAX;
|
||||
int r, c;
|
||||
|
||||
unsigned char *check_here;
|
||||
unsigned int thissad;
|
||||
|
||||
int ref_row = ref_mv->row >> 3;
|
||||
int ref_col = ref_mv->col >> 3;
|
||||
|
||||
int row_min = ref_row - distance;
|
||||
int row_max = ref_row + distance;
|
||||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
unsigned short sad_array8[8];
|
||||
unsigned int sad_array[3];
|
||||
|
||||
// Work out the mid point for the search
|
||||
in_what = *(d->base_pre) + d->pre;
|
||||
bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
|
||||
|
||||
best_mv->row = ref_row;
|
||||
best_mv->col = ref_col;
|
||||
|
||||
// We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
|
||||
if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Baseline value at the centre
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
if (col_min < x->mv_col_min)
|
||||
col_min = x->mv_col_min;
|
||||
|
||||
if (col_max > x->mv_col_max)
|
||||
col_max = x->mv_col_max;
|
||||
|
||||
if (row_min < x->mv_row_min)
|
||||
row_min = x->mv_row_min;
|
||||
|
||||
if (row_max > x->mv_row_max)
|
||||
row_max = x->mv_row_max;
|
||||
|
||||
for (r = row_min; r < row_max ; r++)
|
||||
{
|
||||
this_mv.row = r << 3;
|
||||
check_here = r * mv_stride + in_what + col_min;
|
||||
c = col_min;
|
||||
|
||||
while ((c + 7) < col_max)
|
||||
{
|
||||
int i;
|
||||
|
||||
fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
thissad = (unsigned int)sad_array8[i];
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
bestsad = thissad;
|
||||
best_mv->row = r;
|
||||
best_mv->col = c;
|
||||
bestaddress = check_here;
|
||||
}
|
||||
}
|
||||
|
||||
check_here++;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
while ((c + 2) < col_max)
|
||||
{
|
||||
int i;
|
||||
|
||||
fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
thissad = sad_array[i];
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
bestsad = thissad;
|
||||
best_mv->row = r;
|
||||
best_mv->col = c;
|
||||
bestaddress = check_here;
|
||||
}
|
||||
}
|
||||
|
||||
check_here++;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
while (c < col_max)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
bestsad = thissad;
|
||||
best_mv->row = r;
|
||||
best_mv->col = c;
|
||||
bestaddress = check_here;
|
||||
}
|
||||
}
|
||||
|
||||
check_here ++;
|
||||
c ++;
|
||||
}
|
||||
}
|
||||
|
||||
this_mv.row = best_mv->row << 3;
|
||||
this_mv.col = best_mv->col << 3;
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
#ifdef ENTROPY_STATS
|
||||
void print_mode_context(void)
|
||||
{
|
||||
|
||||
@@ -24,7 +24,7 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
|
||||
#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step
|
||||
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS+3)) - 8) // Max full pel mv specified in 1/8 pel units
|
||||
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
|
||||
#define MAX_POSSIBLE_MV (1 << 11) // Maximum MV in 1/8 pel units
|
||||
|
||||
|
||||
extern void print_mode_context(void);
|
||||
extern int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight);
|
||||
@@ -67,8 +67,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
|
||||
int distance, \
|
||||
vp8_variance_fn_ptr_t *fn_ptr, \
|
||||
int *mvcost[2], \
|
||||
int *mvsadcost[2], \
|
||||
MV *center_mv \
|
||||
int *mvsadcost[2] \
|
||||
)
|
||||
|
||||
#define prototype_diamond_search_sad(sym)\
|
||||
@@ -84,8 +83,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
|
||||
int *num00, \
|
||||
vp8_variance_fn_ptr_t *fn_ptr, \
|
||||
int *mvsadcost[2], \
|
||||
int *mvcost[2], \
|
||||
MV *center_mv \
|
||||
int *mvcost[2] \
|
||||
)
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
@@ -95,7 +93,6 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
|
||||
typedef prototype_full_search_sad(*vp8_full_search_fn_t);
|
||||
extern prototype_full_search_sad(vp8_full_search_sad);
|
||||
extern prototype_full_search_sad(vp8_full_search_sadx3);
|
||||
extern prototype_full_search_sad(vp8_full_search_sadx8);
|
||||
|
||||
typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t);
|
||||
extern prototype_diamond_search_sad(vp8_diamond_search_sad);
|
||||
|
||||
@@ -73,7 +73,6 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi);
|
||||
int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
|
||||
int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
|
||||
|
||||
extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi);
|
||||
|
||||
static void set_default_lf_deltas(VP8_COMP *cpi);
|
||||
|
||||
@@ -175,6 +174,17 @@ static const int kf_high_motion_minq[QINDEX_RANGE] =
|
||||
27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,
|
||||
35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48,
|
||||
};
|
||||
/*static const int kf_minq[QINDEX_RANGE] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6,
|
||||
7, 7, 8, 8, 9, 9, 10,10,11,11,12,12,13,13,14,14,
|
||||
15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
|
||||
23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
|
||||
31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,38
|
||||
};*/
|
||||
static const int gf_low_motion_minq[QINDEX_RANGE] =
|
||||
{
|
||||
0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
|
||||
@@ -208,16 +218,27 @@ static const int gf_high_motion_minq[QINDEX_RANGE] =
|
||||
41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54,
|
||||
55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80,
|
||||
};
|
||||
/*static const int gf_arf_minq[QINDEX_RANGE] =
|
||||
{
|
||||
0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4,
|
||||
4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
|
||||
9,10,10,10,11,11,11,12,12,12,13,13,13,14,14,14,
|
||||
15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
|
||||
23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
|
||||
31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,39,
|
||||
39,40,40,41,41,42,42,43,43,44,45,46,47,48,49,50,
|
||||
51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66
|
||||
};*/
|
||||
static const int inter_minq[QINDEX_RANGE] =
|
||||
{
|
||||
0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9,
|
||||
9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20,
|
||||
20,21,22,22,23,24,24,25,26,27,27,28,29,30,30,31,
|
||||
32,33,33,34,35,36,36,37,38,39,39,40,41,42,42,43,
|
||||
44,45,46,46,47,48,49,50,50,51,52,53,54,55,55,56,
|
||||
57,58,59,60,60,61,62,63,64,65,66,67,67,68,69,70,
|
||||
71,72,73,74,75,75,76,77,78,79,80,81,82,83,84,85,
|
||||
86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
|
||||
0,0,0,0,1,1,2,3,3,4,4,5,6,6,7,7,
|
||||
8,8,9,9,10,11,11,12,12,13,13,14,14,15,15,16,
|
||||
16,17,17,17,18,18,19,19,20,20,21,21,22,22,22,23,
|
||||
23,24,24,24,25,25,26,27,28,28,29,30,31,32,33,34,
|
||||
35,35,36,37,38,39,39,40,41,42,43,43,44,45,46,47,
|
||||
47,48,49,49,51,52,53,54,54,55,56,56,57,57,58,58,
|
||||
59,59,60,61,61,62,62,63,64,64,65,66,67,67,68,69,
|
||||
69,70,71,71,72,73,74,75,76,76,77,78,79,80,81,81,
|
||||
};
|
||||
|
||||
void vp8_initialize()
|
||||
@@ -262,21 +283,6 @@ static void setup_features(VP8_COMP *cpi)
|
||||
|
||||
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
|
||||
{
|
||||
// Delete last frame MV storage buffers
|
||||
if (cpi->lfmv != 0)
|
||||
vpx_free(cpi->lfmv);
|
||||
|
||||
cpi->lfmv = 0;
|
||||
|
||||
if (cpi->lf_ref_frame_sign_bias != 0)
|
||||
vpx_free(cpi->lf_ref_frame_sign_bias);
|
||||
|
||||
cpi->lf_ref_frame_sign_bias = 0;
|
||||
|
||||
if (cpi->lf_ref_frame != 0)
|
||||
vpx_free(cpi->lf_ref_frame);
|
||||
|
||||
cpi->lf_ref_frame = 0;
|
||||
|
||||
// Delete sementation map
|
||||
if (cpi->segmentation_map != 0)
|
||||
@@ -325,15 +331,8 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
|
||||
|
||||
cpi->mb.pip = 0;
|
||||
|
||||
if(cpi->total_stats)
|
||||
vpx_free(cpi->total_stats);
|
||||
|
||||
cpi->total_stats = 0;
|
||||
|
||||
if(cpi->this_frame_stats)
|
||||
vpx_free(cpi->this_frame_stats);
|
||||
|
||||
cpi->this_frame_stats = 0;
|
||||
vpx_free(cpi->total_stats);
|
||||
vpx_free(cpi->this_frame_stats);
|
||||
}
|
||||
|
||||
static void enable_segmentation(VP8_PTR ptr)
|
||||
@@ -564,7 +563,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
int Speed = cpi->Speed;
|
||||
int i;
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
int last_improved_quant = sf->improved_quant;
|
||||
|
||||
// Initialise default mode frequency sampling variables
|
||||
for (i = 0; i < MAX_MODES; i ++)
|
||||
@@ -591,7 +589,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->max_fs_radius = 32;
|
||||
sf->iterative_sub_pixel = 1;
|
||||
sf->optimize_coefficients = 1;
|
||||
sf->use_fastquant_for_pick = 0;
|
||||
|
||||
sf->first_step = 0;
|
||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||
@@ -685,32 +682,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_NEARG ] = 1000;
|
||||
sf->thresh_mult[THR_NEARA ] = 1000;
|
||||
|
||||
#if 1
|
||||
sf->thresh_mult[THR_ZEROMV ] = 0;
|
||||
sf->thresh_mult[THR_ZEROG ] = 0;
|
||||
sf->thresh_mult[THR_ZEROA ] = 0;
|
||||
sf->thresh_mult[THR_NEARESTMV] = 0;
|
||||
sf->thresh_mult[THR_NEARESTG ] = 0;
|
||||
sf->thresh_mult[THR_NEARESTA ] = 0;
|
||||
sf->thresh_mult[THR_NEARMV ] = 0;
|
||||
sf->thresh_mult[THR_NEARG ] = 0;
|
||||
sf->thresh_mult[THR_NEARA ] = 0;
|
||||
|
||||
// sf->thresh_mult[THR_DC ] = 0;
|
||||
|
||||
// sf->thresh_mult[THR_V_PRED ] = 1000;
|
||||
// sf->thresh_mult[THR_H_PRED ] = 1000;
|
||||
// sf->thresh_mult[THR_B_PRED ] = 2000;
|
||||
// sf->thresh_mult[THR_TM ] = 1000;
|
||||
|
||||
sf->thresh_mult[THR_NEWMV ] = 1000;
|
||||
sf->thresh_mult[THR_NEWG ] = 1000;
|
||||
sf->thresh_mult[THR_NEWA ] = 1000;
|
||||
|
||||
sf->thresh_mult[THR_SPLITMV ] = 1700;
|
||||
sf->thresh_mult[THR_SPLITG ] = 4500;
|
||||
sf->thresh_mult[THR_SPLITA ] = 4500;
|
||||
#else
|
||||
sf->thresh_mult[THR_NEWMV ] = 1500;
|
||||
sf->thresh_mult[THR_NEWG ] = 1500;
|
||||
sf->thresh_mult[THR_NEWA ] = 1500;
|
||||
@@ -718,7 +689,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_SPLITMV ] = 5000;
|
||||
sf->thresh_mult[THR_SPLITG ] = 10000;
|
||||
sf->thresh_mult[THR_SPLITA ] = 10000;
|
||||
#endif
|
||||
|
||||
sf->full_freq[0] = 15;
|
||||
sf->full_freq[1] = 31;
|
||||
|
||||
@@ -790,7 +761,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_SPLITA ] = 20000;
|
||||
}
|
||||
|
||||
sf->use_fastquant_for_pick = 1;
|
||||
sf->improved_quant = 0;
|
||||
sf->improved_dct = 0;
|
||||
|
||||
sf->first_step = 1;
|
||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||
@@ -798,8 +770,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
|
||||
if (Speed > 1)
|
||||
{
|
||||
sf->use_fastquant_for_pick = 0;
|
||||
|
||||
cpi->mode_check_freq[THR_SPLITG] = 15;
|
||||
cpi->mode_check_freq[THR_SPLITA] = 15;
|
||||
cpi->mode_check_freq[THR_SPLITMV] = 7;
|
||||
@@ -833,13 +803,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_SPLITA ] = 50000;
|
||||
}
|
||||
|
||||
sf->first_step = 1;
|
||||
|
||||
sf->improved_quant = 0;
|
||||
sf->improved_dct = 0;
|
||||
|
||||
// Only do recode loop on key frames, golden frames and
|
||||
// alt ref frames
|
||||
// Only do recode loop on key frames and golden frames
|
||||
sf->recode_loop = 2;
|
||||
|
||||
sf->full_freq[0] = 31;
|
||||
@@ -1298,8 +1262,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
{
|
||||
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
|
||||
}
|
||||
if (cpi->sf.improved_quant != last_improved_quant)
|
||||
vp8cx_init_quantizer(cpi);
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
cpi->mb.e_mbd.rtcd = &cpi->common.rtcd;
|
||||
@@ -1367,9 +1329,6 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi)
|
||||
|
||||
static int vp8_alloc_partition_data(VP8_COMP *cpi)
|
||||
{
|
||||
if(cpi->mb.pip)
|
||||
vpx_free(cpi->mb.pip);
|
||||
|
||||
cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
|
||||
(cpi->common.mb_rows + 1),
|
||||
sizeof(PARTITION_INFO));
|
||||
@@ -1437,16 +1396,8 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
|
||||
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
if(cpi->total_stats)
|
||||
vpx_free(cpi->total_stats);
|
||||
|
||||
cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
|
||||
|
||||
if(cpi->this_frame_stats)
|
||||
vpx_free(cpi->this_frame_stats);
|
||||
|
||||
cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
|
||||
|
||||
if(!cpi->total_stats || !cpi->this_frame_stats)
|
||||
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate firstpass stats");
|
||||
@@ -2194,10 +2145,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->alt_is_last = 0 ;
|
||||
cpi->gold_is_alt = 0 ;
|
||||
|
||||
// allocate memory for storing last frame's MVs for MV prediction.
|
||||
CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int_mv)));
|
||||
CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
|
||||
CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
|
||||
|
||||
|
||||
// Create the encoder segmentation map and set all entries to 0
|
||||
CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
|
||||
@@ -2253,8 +2201,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
init_context_counters();
|
||||
#endif
|
||||
|
||||
/*Initialize the feed-forward activity masking.*/
|
||||
cpi->activity_avg = 90<<12;
|
||||
|
||||
cpi->frames_since_key = 8; // Give a sensible default for the first frame.
|
||||
cpi->key_frame_frequency = cpi->oxcf.key_freq;
|
||||
@@ -2395,7 +2341,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
|
||||
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
|
||||
cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
|
||||
|
||||
cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
|
||||
@@ -2405,7 +2350,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
|
||||
cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
|
||||
|
||||
cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
|
||||
@@ -2415,7 +2359,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
|
||||
cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
|
||||
|
||||
cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
|
||||
@@ -2425,7 +2368,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
|
||||
cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
|
||||
|
||||
cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
|
||||
@@ -2435,7 +2377,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
|
||||
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
@@ -3486,37 +3427,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
|
||||
#endif
|
||||
// return of 0 means drop frame
|
||||
|
||||
// Function to test for conditions that indeicate we should loop
|
||||
// back and recode a frame.
|
||||
static BOOL recode_loop_test( VP8_COMP *cpi,
|
||||
int high_limit, int low_limit,
|
||||
int q, int maxq, int minq )
|
||||
{
|
||||
BOOL force_recode = FALSE;
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
|
||||
// Is frame recode allowed at all
|
||||
// Yes if either recode mode 1 is selected or mode two is selcted
|
||||
// and the frame is a key frame. golden frame or alt_ref_frame
|
||||
if ( (cpi->sf.recode_loop == 1) ||
|
||||
( (cpi->sf.recode_loop == 2) &&
|
||||
( (cm->frame_type == KEY_FRAME) ||
|
||||
cm->refresh_golden_frame ||
|
||||
cm->refresh_alt_ref_frame ) ) )
|
||||
{
|
||||
// General over and under shoot tests
|
||||
if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
|
||||
((cpi->projected_frame_size < low_limit) && (q > minq)) )
|
||||
{
|
||||
force_recode = TRUE;
|
||||
}
|
||||
// Specific rate control mode related tests
|
||||
// TBD
|
||||
}
|
||||
|
||||
return force_recode;
|
||||
}
|
||||
|
||||
static void encode_frame_to_data_rate
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
@@ -3579,18 +3489,8 @@ static void encode_frame_to_data_rate
|
||||
cpi->zbin_over_quant = 0;
|
||||
cpi->zbin_mode_boost = 0;
|
||||
|
||||
// Enable or disable mode based tweaking of the zbin
|
||||
// For 2 Pass Only used where GF/ARF prediction quality
|
||||
// is above a threshold
|
||||
cpi->zbin_mode_boost = 0;
|
||||
// Enable mode based tweaking of the zbin
|
||||
cpi->zbin_mode_boost_enabled = TRUE;
|
||||
if (cpi->pass == 2)
|
||||
{
|
||||
if ( cpi->gfu_boost <= 400 )
|
||||
{
|
||||
cpi->zbin_mode_boost_enabled = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
// Current default encoder behaviour for the altref sign bias
|
||||
if (cpi->source_alt_ref_active)
|
||||
@@ -3871,16 +3771,17 @@ static void encode_frame_to_data_rate
|
||||
|
||||
vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
|
||||
|
||||
// Limit Q range for the adaptive loop.
|
||||
// Limit Q range for the adaptive loop (Values not clipped to range 20-60 as in VP8).
|
||||
bottom_index = cpi->active_best_quality;
|
||||
top_index = cpi->active_worst_quality;
|
||||
q_low = cpi->active_best_quality;
|
||||
q_high = cpi->active_worst_quality;
|
||||
|
||||
vp8_save_coding_context(cpi);
|
||||
|
||||
loop_count = 0;
|
||||
|
||||
q_low = cpi->best_quality;
|
||||
q_high = cpi->worst_quality;
|
||||
|
||||
|
||||
scale_and_extend_source(cpi->un_scaled_source, cpi);
|
||||
#if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC
|
||||
@@ -3916,6 +3817,7 @@ static void encode_frame_to_data_rate
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0, RTCD(postproc));
|
||||
cpi->ppi.frame = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -3927,6 +3829,10 @@ static void encode_frame_to_data_rate
|
||||
{
|
||||
src += cpi->Source->y_stride * (cpi->Source->y_height - 1);
|
||||
}
|
||||
|
||||
//temp_filter(&cpi->ppi,src,src,
|
||||
// cm->last_frame.y_width * cm->last_frame.y_height,
|
||||
// cpi->oxcf.noise_sensitivity);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4057,13 +3963,15 @@ static void encode_frame_to_data_rate
|
||||
|
||||
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
|
||||
|
||||
q_low = cpi->best_quality;
|
||||
q_high = cpi->worst_quality;
|
||||
|
||||
vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
|
||||
|
||||
// Limit Q range for the adaptive loop.
|
||||
// Limit Q range for the adaptive loop (Values not clipped to range 20-60 as in VP8).
|
||||
bottom_index = cpi->active_best_quality;
|
||||
top_index = cpi->active_worst_quality;
|
||||
q_low = cpi->active_best_quality;
|
||||
q_high = cpi->active_worst_quality;
|
||||
|
||||
|
||||
loop_count++;
|
||||
Loop = TRUE;
|
||||
@@ -4103,18 +4011,19 @@ static void encode_frame_to_data_rate
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
|
||||
// Is the projected frame size out of range and are we allowed to attempt to recode.
|
||||
if ( recode_loop_test( cpi,
|
||||
frame_over_shoot_limit, frame_under_shoot_limit,
|
||||
Q, top_index, bottom_index ) )
|
||||
if (((cpi->sf.recode_loop == 1) ||
|
||||
((cpi->sf.recode_loop == 2) && (cm->refresh_golden_frame || (cm->frame_type == KEY_FRAME)))) &&
|
||||
(((cpi->projected_frame_size > frame_over_shoot_limit) && (Q < top_index)) ||
|
||||
//((cpi->projected_frame_size > frame_over_shoot_limit ) && (Q == top_index) && (cpi->zbin_over_quant < ZBIN_OQ_MAX)) ||
|
||||
((cpi->projected_frame_size < frame_under_shoot_limit) && (Q > bottom_index)))
|
||||
)
|
||||
{
|
||||
int last_q = Q;
|
||||
int Retries = 0;
|
||||
|
||||
// Frame size out of permitted range:
|
||||
// Update correction factor & compute new Q to try...
|
||||
|
||||
// Frame is too large
|
||||
if (cpi->projected_frame_size > cpi->this_frame_target)
|
||||
if (cpi->projected_frame_size > frame_over_shoot_limit)
|
||||
{
|
||||
//if ( cpi->zbin_over_quant == 0 )
|
||||
q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
|
||||
@@ -4158,7 +4067,6 @@ static void encode_frame_to_data_rate
|
||||
|
||||
overshoot_seen = TRUE;
|
||||
}
|
||||
// Frame is too small
|
||||
else
|
||||
{
|
||||
if (cpi->zbin_over_quant == 0)
|
||||
@@ -4252,36 +4160,6 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
#endif
|
||||
|
||||
// Update the GF useage maps.
|
||||
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
|
||||
vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
|
||||
|
||||
// This frame's MVs are saved and will be used in next frame's MV prediction.
|
||||
if(cm->show_frame) //do not save for altref frame
|
||||
{
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays.
|
||||
//static int last_video_frame = 0;
|
||||
|
||||
if(cm->frame_type != KEY_FRAME)
|
||||
{
|
||||
for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
|
||||
{
|
||||
if(tmp->mbmi.ref_frame != INTRA_FRAME)
|
||||
cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride)].as_int = tmp->mbmi.mv.as_int;
|
||||
|
||||
cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
|
||||
cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride)] = tmp->mbmi.ref_frame;
|
||||
tmp++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Update the GF useage maps.
|
||||
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
|
||||
vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
|
||||
@@ -4340,11 +4218,10 @@ static void encode_frame_to_data_rate
|
||||
{
|
||||
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
|
||||
cm->last_frame_type = cm->frame_type;
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
/* Move storing frame_type out of the above loop since it is also needed in motion search besides loopfilter */
|
||||
cm->last_frame_type = cm->frame_type;
|
||||
|
||||
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
|
||||
|
||||
@@ -4656,7 +4533,7 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame && (cpi->common.frame_type != KEY_FRAME))
|
||||
if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame)
|
||||
// Update the alternate reference frame and stats as appropriate.
|
||||
update_alt_ref_frame_and_stats(cpi);
|
||||
else
|
||||
@@ -4979,7 +4856,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
{
|
||||
int thiserr;
|
||||
cpi->oxcf.arnr_strength = i;
|
||||
vp8_temporal_filter_prepare_c(cpi);
|
||||
vp8cx_temp_filter_c(cpi);
|
||||
|
||||
thiserr = vp8_calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer,
|
||||
&cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));
|
||||
@@ -4994,7 +4871,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
if (besti != -1)
|
||||
{
|
||||
cpi->oxcf.arnr_strength = besti;
|
||||
vp8_temporal_filter_prepare_c(cpi);
|
||||
vp8cx_temp_filter_c(cpi);
|
||||
s = &cpi->alt_ref_buffer;
|
||||
|
||||
// FWG not sure if I need to copy this data for the Alt Ref frame
|
||||
@@ -5006,7 +4883,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
s = &cpi->src_buffer[cpi->last_alt_ref_sei];
|
||||
|
||||
#else
|
||||
vp8_temporal_filter_prepare_c(cpi);
|
||||
vp8cx_temp_filter_c(cpi);
|
||||
s = &cpi->alt_ref_buffer;
|
||||
|
||||
// FWG not sure if I need to copy this data for the Alt Ref frame
|
||||
@@ -5090,16 +4967,17 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
|
||||
*frame_flags = cpi->source_frame_flags;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
|
||||
if (cpi->source_time_stamp < cpi->first_time_stamp_ever)
|
||||
{
|
||||
cpi->first_time_stamp_ever = cpi->source_time_stamp;
|
||||
cpi->last_end_time_stamp_seen = cpi->source_time_stamp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// adjust frame rates based on timestamps given
|
||||
if (!cm->refresh_alt_ref_frame)
|
||||
{
|
||||
if (cpi->source_time_stamp == cpi->first_time_stamp_ever)
|
||||
if (cpi->last_time_stamp_seen == 0)
|
||||
{
|
||||
double this_fps = 10000000.000 / (cpi->source_end_time_stamp - cpi->source_time_stamp);
|
||||
|
||||
@@ -5107,8 +4985,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
}
|
||||
else
|
||||
{
|
||||
long long nanosecs = cpi->source_end_time_stamp
|
||||
- cpi->last_end_time_stamp_seen;
|
||||
long long nanosecs = cpi->source_time_stamp - cpi->last_time_stamp_seen;
|
||||
double this_fps = 10000000.000 / nanosecs;
|
||||
|
||||
vp8_new_frame_rate(cpi, (7 * cpi->oxcf.frame_rate + this_fps) / 8);
|
||||
@@ -5116,7 +4993,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
}
|
||||
|
||||
cpi->last_time_stamp_seen = cpi->source_time_stamp;
|
||||
cpi->last_end_time_stamp_seen = cpi->source_end_time_stamp;
|
||||
}
|
||||
|
||||
if (cpi->compressor_speed == 2)
|
||||
@@ -5332,7 +5208,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags)
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *) comp;
|
||||
|
||||
@@ -5342,7 +5218,7 @@ int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflag
|
||||
{
|
||||
int ret;
|
||||
#if CONFIG_POSTPROC
|
||||
ret = vp8_post_proc_frame(&cpi->common, dest, flags);
|
||||
ret = vp8_post_proc_frame(&cpi->common, dest, deblock_level, noise_level, flags);
|
||||
#else
|
||||
|
||||
if (cpi->common.frame_to_show)
|
||||
@@ -5435,12 +5311,12 @@ int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *) comp;
|
||||
|
||||
if (horiz_mode <= ONETWO)
|
||||
if (horiz_mode >= NORMAL && horiz_mode <= ONETWO)
|
||||
cpi->common.horiz_scale = horiz_mode;
|
||||
else
|
||||
return -1;
|
||||
|
||||
if (vert_mode <= ONETWO)
|
||||
if (vert_mode >= NORMAL && vert_mode <= ONETWO)
|
||||
cpi->common.vert_scale = vert_mode;
|
||||
else
|
||||
return -1;
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "treewriter.h"
|
||||
#include "tokenize.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "preproc.h"
|
||||
#include "variance.h"
|
||||
#include "dct.h"
|
||||
#include "encodemb.h"
|
||||
@@ -27,7 +28,6 @@
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "mcomp.h"
|
||||
#include "temporal_filter.h"
|
||||
|
||||
//#define SPEEDSTATS 1
|
||||
#define MIN_GF_INTERVAL 4
|
||||
@@ -46,8 +46,6 @@
|
||||
#define MAX_THRESHMULT 512
|
||||
|
||||
#define GF_ZEROMV_ZBIN_BOOST 24
|
||||
#define LF_ZEROMV_ZBIN_BOOST 12
|
||||
#define MV_ZBIN_BOOST 4
|
||||
#define ZBIN_OQ_MAX 192
|
||||
|
||||
#define VP8_TEMPORAL_ALT_REF 1
|
||||
@@ -182,8 +180,6 @@ typedef struct
|
||||
int first_step;
|
||||
int optimize_coefficients;
|
||||
|
||||
int use_fastquant_for_pick;
|
||||
|
||||
} SPEED_FEATURES;
|
||||
|
||||
typedef struct
|
||||
@@ -231,7 +227,6 @@ typedef struct VP8_ENCODER_RTCD
|
||||
vp8_encodemb_rtcd_vtable_t encodemb;
|
||||
vp8_quantize_rtcd_vtable_t quantize;
|
||||
vp8_search_rtcd_vtable_t search;
|
||||
vp8_temporal_rtcd_vtable_t temporal;
|
||||
} VP8_ENCODER_RTCD;
|
||||
|
||||
enum
|
||||
@@ -244,12 +239,6 @@ enum
|
||||
BLOCK_MAX_SEGMENTS
|
||||
};
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
@@ -271,9 +260,6 @@ typedef struct
|
||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]);
|
||||
|
||||
|
||||
MACROBLOCK mb;
|
||||
@@ -290,14 +276,14 @@ typedef struct
|
||||
unsigned int source_frame_flags;
|
||||
YV12_BUFFER_CONFIG scaled_source;
|
||||
|
||||
int source_buffer_count; // number of src_buffers in use for lagged encoding
|
||||
int source_encode_index; // index of buffer in src_buffer to encode
|
||||
int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
|
||||
int source_alt_ref_active; // an alt ref frame has been encoded and is usable
|
||||
int source_buffer_count;
|
||||
int source_encode_index;
|
||||
int source_alt_ref_pending;
|
||||
int source_alt_ref_active;
|
||||
|
||||
int last_alt_ref_sei; // index into src_buffers of frame used as alt reference
|
||||
int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame
|
||||
int is_next_src_alt_ref; // source of next frame to encode is an exact copy of an alt ref frame
|
||||
int last_alt_ref_sei;
|
||||
int is_src_frame_alt_ref;
|
||||
int is_next_src_alt_ref;
|
||||
|
||||
int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
|
||||
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
|
||||
@@ -333,7 +319,6 @@ typedef struct
|
||||
int mvcostmultiplier;
|
||||
int subseqblockweight;
|
||||
int errthresh;
|
||||
unsigned int activity_avg;
|
||||
|
||||
int RDMULT;
|
||||
int RDDIV ;
|
||||
@@ -414,7 +399,6 @@ typedef struct
|
||||
int inter_frame_target;
|
||||
double output_frame_rate;
|
||||
long long last_time_stamp_seen;
|
||||
long long last_end_time_stamp_seen;
|
||||
long long first_time_stamp_ever;
|
||||
|
||||
int ni_av_qi;
|
||||
@@ -470,6 +454,8 @@ typedef struct
|
||||
unsigned char *output_partition2;
|
||||
size_t output_partition2size;
|
||||
|
||||
pre_proc_instance ppi;
|
||||
|
||||
int frames_to_key;
|
||||
int gfu_boost;
|
||||
int kf_boost;
|
||||
@@ -480,17 +466,11 @@ typedef struct
|
||||
double start_tot_err_left;
|
||||
double min_error;
|
||||
|
||||
double modified_error_total;
|
||||
double modified_error_used;
|
||||
double modified_error_left;
|
||||
double clip_bpe;
|
||||
double observed_bpe;
|
||||
|
||||
double modified_total_error_left;
|
||||
double avg_iiratio;
|
||||
|
||||
int target_bandwidth;
|
||||
long long bits_left;
|
||||
long long clip_bits_total;
|
||||
FIRSTPASS_STATS *total_stats;
|
||||
FIRSTPASS_STATS *this_frame_stats;
|
||||
FIRSTPASS_STATS *stats_in, *stats_in_end;
|
||||
@@ -631,6 +611,9 @@ typedef struct
|
||||
unsigned int tempdata2;
|
||||
|
||||
int base_skip_false_prob[128];
|
||||
unsigned int section_is_low_motion;
|
||||
unsigned int section_benefits_from_aggresive_q;
|
||||
unsigned int section_is_fast_motion;
|
||||
unsigned int section_intra_rating;
|
||||
|
||||
double section_max_qfactor;
|
||||
@@ -678,10 +661,6 @@ typedef struct
|
||||
unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0
|
||||
int gf_active_count;
|
||||
|
||||
//Store last frame's MV info for next frame MV prediction
|
||||
int_mv *lfmv;
|
||||
int *lf_ref_frame_sign_bias;
|
||||
int *lf_ref_frame;
|
||||
|
||||
} VP8_COMP;
|
||||
|
||||
@@ -691,8 +670,6 @@ void vp8_encode_frame(VP8_COMP *cpi);
|
||||
|
||||
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
|
||||
|
||||
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
|
||||
int rd_cost_intra_mb(MACROBLOCKD *x);
|
||||
|
||||
void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);
|
||||
|
||||
@@ -685,7 +685,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
#if 0
|
||||
|
||||
// Initial step Search
|
||||
bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost, &best_ref_mv1);
|
||||
bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost);
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
@@ -698,7 +698,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost, &best_ref_mv1);
|
||||
thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost);
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
@@ -724,7 +724,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
}
|
||||
else
|
||||
{
|
||||
bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb < 9
|
||||
bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
@@ -743,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb = 9
|
||||
thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
|
||||
251
vp8/encoder/preproc.c
Normal file
251
vp8/encoder/preproc.c
Normal file
@@ -0,0 +1,251 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : preproc.c
|
||||
*
|
||||
* Description : Simple pre-processor.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
|
||||
#include "memory.h"
|
||||
#include "preproc7.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Macros
|
||||
****************************************************************************/
|
||||
#define FRAMECOUNT 7
|
||||
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
|
||||
|
||||
/****************************************************************************
|
||||
* Imports
|
||||
****************************************************************************/
|
||||
extern void vp8_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Global Variables
|
||||
****************************************************************************/
|
||||
void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
|
||||
void temp_filter_mmx
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
);
|
||||
void temp_filter_wmt
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : temp_filter_c
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* unsigned char *s : Pointer to source frame.
|
||||
* unsigned char *d : Pointer to destination frame.
|
||||
* int bytes : Number of bytes to filter.
|
||||
* int strength : Strength of filter to apply.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs a closesness adjusted temporarl blur
|
||||
*
|
||||
* SPECIAL NOTES : Destination frame can be same as source frame.
|
||||
*
|
||||
****************************************************************************/
|
||||
void temp_filter_c
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
)
|
||||
{
|
||||
int byte = 0;
|
||||
unsigned char *frameptr = ppi->frame_buffer;
|
||||
|
||||
if (ppi->frame == 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
int frame = 0;
|
||||
|
||||
do
|
||||
{
|
||||
*frameptr = s[byte];
|
||||
++frameptr;
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
d[byte] = s[byte];
|
||||
|
||||
++byte;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
int modifier;
|
||||
int offset = (ppi->frame % FRAMECOUNT);
|
||||
|
||||
do
|
||||
{
|
||||
int accumulator = 0;
|
||||
int count = 0;
|
||||
int frame = 0;
|
||||
|
||||
frameptr[offset] = s[byte];
|
||||
|
||||
do
|
||||
{
|
||||
int pixel_value = *frameptr;
|
||||
|
||||
modifier = s[byte];
|
||||
modifier -= pixel_value;
|
||||
modifier *= modifier;
|
||||
modifier >>= strength;
|
||||
modifier *= 3;
|
||||
|
||||
if (modifier > 16)
|
||||
modifier = 16;
|
||||
|
||||
modifier = 16 - modifier;
|
||||
|
||||
accumulator += modifier * pixel_value;
|
||||
|
||||
count += modifier;
|
||||
|
||||
frameptr++;
|
||||
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
accumulator += (count >> 1);
|
||||
accumulator *= ppi->fixed_divide[count];
|
||||
accumulator >>= 16;
|
||||
|
||||
d[byte] = accumulator;
|
||||
|
||||
++byte;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
|
||||
++ppi->frame;
|
||||
}
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : delete_pre_proc
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Deletes a pre-processing instance.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void delete_pre_proc(pre_proc_instance *ppi)
|
||||
{
|
||||
if (ppi->frame_buffer_alloc)
|
||||
vpx_free(ppi->frame_buffer_alloc);
|
||||
|
||||
ppi->frame_buffer_alloc = 0;
|
||||
ppi->frame_buffer = 0;
|
||||
|
||||
if (ppi->fixed_divide_alloc)
|
||||
vpx_free(ppi->fixed_divide_alloc);
|
||||
|
||||
ppi->fixed_divide_alloc = 0;
|
||||
ppi->fixed_divide = 0;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : init_pre_proc
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* int frame_size : Number of bytes in one frame.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : int: 1 if successful, 0 if failed.
|
||||
*
|
||||
* FUNCTION : Initializes prepprocessor instance.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
int init_pre_proc7(pre_proc_instance *ppi, int frame_size)
|
||||
{
|
||||
int i;
|
||||
int mmx_enabled;
|
||||
int xmm_enabled;
|
||||
int wmt_enabled;
|
||||
|
||||
vp8_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
|
||||
|
||||
if (wmt_enabled)
|
||||
temp_filter = temp_filter_wmt;
|
||||
else if (mmx_enabled)
|
||||
temp_filter = temp_filter_mmx;
|
||||
else
|
||||
temp_filter = temp_filter_c;
|
||||
|
||||
|
||||
delete_pre_proc(ppi);
|
||||
|
||||
ppi->frame_buffer_alloc = vpx_malloc(32 + frame_size * FRAMECOUNT * sizeof(unsigned char));
|
||||
|
||||
if (!ppi->frame_buffer_alloc)
|
||||
{
|
||||
delete_pre_proc(ppi);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ppi->frame_buffer = (unsigned char *) ROUNDUP32(ppi->frame_buffer_alloc);
|
||||
|
||||
ppi->fixed_divide_alloc = vpx_malloc(32 + 255 * sizeof(unsigned int));
|
||||
|
||||
if (!ppi->fixed_divide_alloc)
|
||||
{
|
||||
delete_pre_proc(ppi);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ppi->fixed_divide = (unsigned int *) ROUNDUP32(ppi->fixed_divide_alloc);
|
||||
|
||||
for (i = 1; i < 255; i++)
|
||||
ppi->fixed_divide[i] = 0x10000 / i;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -16,9 +16,8 @@
|
||||
#include "entropy.h"
|
||||
#include "predictdc.h"
|
||||
|
||||
#define EXACT_QUANT
|
||||
|
||||
#ifdef EXACT_FASTQUANT
|
||||
//#define EXACT_QUANT
|
||||
#ifdef EXACT_QUANT
|
||||
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
@@ -27,7 +26,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *zbin_ptr = b->zbin;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *quant_ptr = b->quant;
|
||||
short *quant_shift_ptr = b->quant_shift;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
@@ -65,45 +64,6 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
d->eob = eob + 1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
int zbin;
|
||||
int x, y, z, sz;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
|
||||
eob = -1;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
rc = vp8_default_zig_zag1d[i];
|
||||
z = coeff_ptr[rc];
|
||||
|
||||
sz = (z >> 31); // sign of z
|
||||
x = (z ^ sz) - sz; // x = abs(z)
|
||||
|
||||
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
|
||||
if (y)
|
||||
{
|
||||
eob = i; // last nonzero coeffs
|
||||
}
|
||||
}
|
||||
d->eob = eob + 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef EXACT_QUANT
|
||||
void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
@@ -218,6 +178,39 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
}
|
||||
|
||||
#else
|
||||
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
int zbin;
|
||||
int x, y, z, sz;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
|
||||
eob = -1;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
rc = vp8_default_zig_zag1d[i];
|
||||
z = coeff_ptr[rc];
|
||||
|
||||
sz = (z >> 31); // sign of z
|
||||
x = (z ^ sz) - sz; // x = abs(z)
|
||||
|
||||
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
|
||||
if (y)
|
||||
{
|
||||
eob = i; // last nonzero coeffs
|
||||
}
|
||||
}
|
||||
d->eob = eob + 1;
|
||||
}
|
||||
|
||||
void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
|
||||
@@ -45,48 +45,46 @@ extern int inter_b_modes[10];
|
||||
// Bits Per MB at different Q (Multiplied by 512)
|
||||
#define BPER_MB_NORMBITS 9
|
||||
|
||||
// Work in progress recalibration of baseline rate tables based on
|
||||
// the assumption that bits per mb is inversely proportional to the
|
||||
// quantizer value.
|
||||
const int vp8_bits_per_mb[2][QINDEX_RANGE] =
|
||||
{
|
||||
// Intra case 450000/Qintra
|
||||
// (Updated 19 March 08) Baseline estimate of INTRA-frame Bits Per MB at each Q:
|
||||
{
|
||||
1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000,
|
||||
409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705,
|
||||
250000, 236842, 225000, 225000, 214285, 214285, 204545, 204545,
|
||||
195652, 195652, 187500, 180000, 180000, 173076, 166666, 160714,
|
||||
155172, 150000, 145161, 140625, 136363, 132352, 128571, 125000,
|
||||
121621, 121621, 118421, 115384, 112500, 109756, 107142, 104651,
|
||||
102272, 100000, 97826, 97826, 95744, 93750, 91836, 90000,
|
||||
88235, 86538, 84905, 83333, 81818, 80357, 78947, 77586,
|
||||
76271, 75000, 73770, 72580, 71428, 70312, 69230, 68181,
|
||||
67164, 66176, 65217, 64285, 63380, 62500, 61643, 60810,
|
||||
60000, 59210, 59210, 58441, 57692, 56962, 56250, 55555,
|
||||
54878, 54216, 53571, 52941, 52325, 51724, 51136, 50561,
|
||||
49450, 48387, 47368, 46875, 45918, 45000, 44554, 44117,
|
||||
43269, 42452, 41666, 40909, 40178, 39473, 38793, 38135,
|
||||
36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088,
|
||||
32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662,
|
||||
674781, 606845, 553905, 524293, 500428, 452540, 435379, 414719,
|
||||
390970, 371082, 359416, 341807, 336957, 317263, 303724, 298402,
|
||||
285688, 275237, 268455, 262560, 256038, 248734, 241087, 237615,
|
||||
229247, 225211, 219112, 213920, 211559, 202714, 198482, 193401,
|
||||
187866, 183453, 179212, 175965, 171852, 167235, 163972, 160560,
|
||||
156032, 154349, 151390, 148725, 145708, 142311, 139981, 137700,
|
||||
134084, 131863, 129746, 128498, 126077, 123461, 121290, 117782,
|
||||
114883, 112332, 108410, 105685, 103434, 101192, 98587, 95959,
|
||||
94059, 92017, 89970, 87936, 86142, 84801, 82736, 81106,
|
||||
79668, 78135, 76641, 75103, 73943, 72693, 71401, 70098,
|
||||
69165, 67901, 67170, 65987, 64923, 63534, 62378, 61302,
|
||||
59921, 58941, 57844, 56782, 55960, 54973, 54257, 53454,
|
||||
52230, 50938, 49962, 49190, 48288, 47270, 46738, 46037,
|
||||
45020, 44027, 43216, 42287, 41594, 40702, 40081, 39414,
|
||||
38282, 37627, 36987, 36375, 35808, 35236, 34710, 34162,
|
||||
33659, 33327, 32751, 32384, 31936, 31461, 30982, 30582,
|
||||
},
|
||||
// Inter case 285000/Qinter
|
||||
|
||||
// (Updated 19 March 08) Baseline estimate of INTER-frame Bits Per MB at each Q:
|
||||
{
|
||||
712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090,
|
||||
237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000,
|
||||
142500, 135714, 129545, 123913, 118750, 114000, 109615, 105555,
|
||||
101785, 98275, 95000, 91935, 89062, 86363, 83823, 81428,
|
||||
79166, 77027, 75000, 73076, 71250, 69512, 67857, 66279,
|
||||
64772, 63333, 61956, 60638, 59375, 58163, 57000, 55882,
|
||||
54807, 53773, 52777, 51818, 50892, 50000, 49137, 47500,
|
||||
45967, 44531, 43181, 41911, 40714, 39583, 38513, 37500,
|
||||
36538, 35625, 34756, 33928, 33139, 32386, 31666, 30978,
|
||||
30319, 29687, 29081, 28500, 27941, 27403, 26886, 26388,
|
||||
25909, 25446, 25000, 24568, 23949, 23360, 22800, 22265,
|
||||
21755, 21268, 20802, 20357, 19930, 19520, 19127, 18750,
|
||||
18387, 18037, 17701, 17378, 17065, 16764, 16473, 16101,
|
||||
15745, 15405, 15079, 14766, 14467, 14179, 13902, 13636,
|
||||
13380, 13133, 12895, 12666, 12445, 12179, 11924, 11632,
|
||||
11445, 11220, 11003, 10795, 10594, 10401, 10215, 10035,
|
||||
497401, 426316, 372064, 352732, 335763, 283921, 273848, 253321,
|
||||
233181, 217727, 210030, 196685, 194836, 178396, 167753, 164116,
|
||||
154119, 146929, 142254, 138488, 133591, 127741, 123166, 120226,
|
||||
114188, 111756, 107882, 104749, 102522, 96451, 94424, 90905,
|
||||
87286, 84931, 82111, 80534, 77610, 74700, 73037, 70715,
|
||||
68006, 67235, 65374, 64009, 62134, 60180, 59105, 57691,
|
||||
55509, 54512, 53318, 52693, 51194, 49840, 48944, 46980,
|
||||
45668, 44177, 42348, 40994, 39859, 38889, 37717, 36391,
|
||||
35482, 34622, 33795, 32756, 32002, 31492, 30573, 29737,
|
||||
29152, 28514, 27941, 27356, 26859, 26329, 25874, 25364,
|
||||
24957, 24510, 24290, 23689, 23380, 22845, 22481, 22066,
|
||||
21587, 21219, 20880, 20452, 20260, 19926, 19661, 19334,
|
||||
18915, 18391, 18046, 17833, 17441, 17105, 16888, 16729,
|
||||
16383, 16023, 15706, 15442, 15222, 14938, 14673, 14452,
|
||||
14005, 13807, 13611, 13447, 13223, 13102, 12963, 12801,
|
||||
12627, 12534, 12356, 12228, 12056, 11907, 11746, 11643,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -326,7 +324,6 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
|
||||
cpi->frames_till_gf_update_due = cpi->goldfreq;
|
||||
|
||||
cpi->common.refresh_golden_frame = TRUE;
|
||||
cpi->common.refresh_alt_ref_frame = TRUE;
|
||||
}
|
||||
|
||||
void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi)
|
||||
@@ -1037,7 +1034,9 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
gf_frame_useage = pct_gf_active;
|
||||
|
||||
// Is a fixed manual GF frequency being used
|
||||
if (cpi->auto_gold)
|
||||
if (!cpi->auto_gold)
|
||||
cpi->common.refresh_golden_frame = TRUE;
|
||||
else
|
||||
{
|
||||
// For one pass throw a GF if recent frame intra useage is low or the GF useage is high
|
||||
if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5))
|
||||
|
||||
1475
vp8/encoder/rdopt.c
1475
vp8/encoder/rdopt.c
File diff suppressed because it is too large
Load Diff
@@ -126,24 +126,6 @@ void vp8_sad16x16x3_c(
|
||||
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -157,24 +139,6 @@ void vp8_sad16x8x3_c(
|
||||
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -188,24 +152,6 @@ void vp8_sad8x8x3_c(
|
||||
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -219,24 +165,6 @@ void vp8_sad8x16x3_c(
|
||||
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -250,24 +178,6 @@ void vp8_sad4x4x3_c(
|
||||
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x4d_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
|
||||
@@ -36,37 +36,30 @@
|
||||
|
||||
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
|
||||
#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
|
||||
#define USE_FILTER_LUT 0 // use lookup table to improve filter
|
||||
|
||||
#define USE_FILTER_LUT 1
|
||||
#if VP8_TEMPORAL_ALT_REF
|
||||
|
||||
#if USE_FILTER_LUT
|
||||
// for (strength = 0; strength <= 6; strength++) {
|
||||
// for (delta = 0; delta <= 18; delta++) {
|
||||
// float coeff = (3.0 * delta * delta) / pow(2, strength);
|
||||
// printf("%3d", (int)roundf(coeff > 16 ? 0 : 16-coeff));
|
||||
// }
|
||||
// printf("\n");
|
||||
// }
|
||||
static int modifier_lut[7][19] =
|
||||
{
|
||||
// Strength=0
|
||||
{16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=1
|
||||
{16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=2
|
||||
{16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=3
|
||||
{16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=4
|
||||
{16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=5
|
||||
{16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
|
||||
{16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=6
|
||||
{16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
|
||||
{16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
|
||||
};
|
||||
#endif
|
||||
static void vp8_temporal_filter_predictors_mb_c
|
||||
static void build_predictors_mb
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
unsigned char *y_mb_ptr,
|
||||
@@ -118,7 +111,7 @@ static void vp8_temporal_filter_predictors_mb_c
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
|
||||
}
|
||||
}
|
||||
void vp8_temporal_filter_apply_c
|
||||
static void apply_temporal_filter
|
||||
(
|
||||
unsigned char *frame1,
|
||||
unsigned int stride,
|
||||
@@ -147,14 +140,16 @@ void vp8_temporal_filter_apply_c
|
||||
int pixel_value = *frame2++;
|
||||
|
||||
#if USE_FILTER_LUT
|
||||
// LUT implementation --
|
||||
// improves precision of filter
|
||||
modifier = abs(src_byte-pixel_value);
|
||||
modifier = modifier>18 ? 0 : lut[modifier];
|
||||
#else
|
||||
modifier = src_byte - pixel_value;
|
||||
modifier = src_byte;
|
||||
modifier -= pixel_value;
|
||||
modifier *= modifier;
|
||||
modifier *= 3;
|
||||
modifier += 1 << (strength - 1);
|
||||
modifier >>= strength;
|
||||
modifier *= 3;
|
||||
|
||||
if (modifier > 16)
|
||||
modifier = 16;
|
||||
@@ -176,7 +171,7 @@ void vp8_temporal_filter_apply_c
|
||||
#if ALT_REF_MC_ENABLED
|
||||
static int dummy_cost[2*mv_max+1];
|
||||
|
||||
static int vp8_temporal_filter_find_matching_mb_c
|
||||
static int find_matching_mb
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
YV12_BUFFER_CONFIG *arf_frame,
|
||||
@@ -251,7 +246,7 @@ static int vp8_temporal_filter_find_matching_mb_c
|
||||
step_param,
|
||||
sadpb / 2/*x->errorperbit*/,
|
||||
&num00, &cpi->fn_ptr[BLOCK_16X16],
|
||||
mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9
|
||||
mvsadcost, mvcost); //sadpb < 9
|
||||
|
||||
// Further step/diamond searches as necessary
|
||||
n = 0;
|
||||
@@ -273,7 +268,7 @@ static int vp8_temporal_filter_find_matching_mb_c
|
||||
step_param + n,
|
||||
sadpb / 4/*x->errorperbit*/,
|
||||
&num00, &cpi->fn_ptr[BLOCK_16X16],
|
||||
mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9
|
||||
mvsadcost, mvcost); //sadpb = 9
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
@@ -297,7 +292,7 @@ static int vp8_temporal_filter_find_matching_mb_c
|
||||
bestsme = cpi->find_fractional_mv_step(x, b, d,
|
||||
&d->bmi.mv.as_mv, &best_ref_mv1,
|
||||
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
|
||||
mvcost);
|
||||
cpi->mb.mvcost);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -313,7 +308,7 @@ static int vp8_temporal_filter_find_matching_mb_c
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vp8_temporal_filter_iterate_c
|
||||
static void vp8cx_temp_blur1_c
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
int frame_count,
|
||||
@@ -417,12 +412,11 @@ static void vp8_temporal_filter_iterate_c
|
||||
#define THRESH_HIGH 20000
|
||||
|
||||
// Correlation has been lost try MC
|
||||
err = vp8_temporal_filter_find_matching_mb_c
|
||||
(cpi,
|
||||
cpi->frames[alt_ref_index],
|
||||
cpi->frames[frame],
|
||||
mb_y_offset,
|
||||
THRESH_LOW);
|
||||
err = find_matching_mb ( cpi,
|
||||
cpi->frames[alt_ref_index],
|
||||
cpi->frames[frame],
|
||||
mb_y_offset,
|
||||
THRESH_LOW );
|
||||
|
||||
if (filter_weight[frame] < 2)
|
||||
{
|
||||
@@ -435,46 +429,43 @@ static void vp8_temporal_filter_iterate_c
|
||||
if (filter_weight[frame] != 0)
|
||||
{
|
||||
// Construct the predictors
|
||||
vp8_temporal_filter_predictors_mb_c
|
||||
(mbd,
|
||||
cpi->frames[frame]->y_buffer + mb_y_offset,
|
||||
cpi->frames[frame]->u_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->v_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->y_stride,
|
||||
mbd->block[0].bmi.mv.as_mv.row,
|
||||
mbd->block[0].bmi.mv.as_mv.col,
|
||||
predictor);
|
||||
build_predictors_mb (
|
||||
mbd,
|
||||
cpi->frames[frame]->y_buffer + mb_y_offset,
|
||||
cpi->frames[frame]->u_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->v_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->y_stride,
|
||||
mbd->block[0].bmi.mv.as_mv.row,
|
||||
mbd->block[0].bmi.mv.as_mv.col,
|
||||
predictor );
|
||||
|
||||
// Apply the filter (YUV)
|
||||
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
|
||||
(f->y_buffer + mb_y_offset,
|
||||
f->y_stride,
|
||||
predictor,
|
||||
16,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator,
|
||||
count);
|
||||
apply_temporal_filter ( f->y_buffer + mb_y_offset,
|
||||
f->y_stride,
|
||||
predictor,
|
||||
16,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator,
|
||||
count );
|
||||
|
||||
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
|
||||
(f->u_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 256,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 256,
|
||||
count + 256);
|
||||
apply_temporal_filter ( f->u_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 256,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 256,
|
||||
count + 256 );
|
||||
|
||||
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
|
||||
(f->v_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 320,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 320,
|
||||
count + 320);
|
||||
apply_temporal_filter ( f->v_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 320,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 320,
|
||||
count + 320 );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -543,7 +534,7 @@ static void vp8_temporal_filter_iterate_c
|
||||
mbd->pre.v_buffer = v_buffer;
|
||||
}
|
||||
|
||||
void vp8_temporal_filter_prepare_c
|
||||
void vp8cx_temp_filter_c
|
||||
(
|
||||
VP8_COMP *cpi
|
||||
)
|
||||
@@ -651,7 +642,7 @@ void vp8_temporal_filter_prepare_c
|
||||
= &cpi->src_buffer[which_buffer].source_buffer;
|
||||
}
|
||||
|
||||
vp8_temporal_filter_iterate_c (
|
||||
vp8cx_temp_blur1_c (
|
||||
cpi,
|
||||
frames_to_blur,
|
||||
frames_to_blur_backward,
|
||||
|
||||
@@ -12,33 +12,8 @@
|
||||
#ifndef __INC_VP8_TEMPORAL_FILTER_H
|
||||
#define __INC_VP8_TEMPORAL_FILTER_H
|
||||
|
||||
#define prototype_apply(sym)\
|
||||
void (sym) \
|
||||
( \
|
||||
unsigned char *frame1, \
|
||||
unsigned int stride, \
|
||||
unsigned char *frame2, \
|
||||
unsigned int block_size, \
|
||||
int strength, \
|
||||
int filter_weight, \
|
||||
unsigned int *accumulator, \
|
||||
unsigned int *count \
|
||||
)
|
||||
#include "onyx_int.h"
|
||||
|
||||
#ifndef vp8_temporal_filter_apply
|
||||
#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
|
||||
#endif
|
||||
extern prototype_apply(vp8_temporal_filter_apply);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
prototype_apply(*apply);
|
||||
} vp8_temporal_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn
|
||||
#else
|
||||
#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_filter_##fn
|
||||
#endif
|
||||
void vp8cx_temp_filter_c(VP8_COMP *cpi);
|
||||
|
||||
#endif // __INC_VP8_TEMPORAL_FILTER_H
|
||||
|
||||
@@ -132,6 +132,8 @@ static void tokenize2nd_order_b
|
||||
t->Token = x;
|
||||
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
|
||||
|
||||
t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0);
|
||||
|
||||
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
|
||||
|
||||
++cpi->coef_counts [type] [band] [pt] [x];
|
||||
@@ -183,6 +185,7 @@ static void tokenize1st_order_b
|
||||
t->Token = x;
|
||||
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
|
||||
|
||||
t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0);
|
||||
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
|
||||
|
||||
++cpi->coef_counts [type] [band] [pt] [x];
|
||||
@@ -431,6 +434,7 @@ static __inline void stuff2nd_order_b
|
||||
|
||||
t->Token = DCT_EOB_TOKEN;
|
||||
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
|
||||
t->section = 11;
|
||||
t->skip_eob_node = 0;
|
||||
++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
|
||||
++t;
|
||||
@@ -461,6 +465,7 @@ static __inline void stuff1st_order_b
|
||||
|
||||
t->Token = DCT_EOB_TOKEN;
|
||||
t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt];
|
||||
t->section = 8;
|
||||
t->skip_eob_node = 0;
|
||||
++cpi->coef_counts [0] [1] [pt] [DCT_EOB_TOKEN];
|
||||
++t;
|
||||
@@ -490,6 +495,7 @@ void stuff1st_order_buv
|
||||
|
||||
t->Token = DCT_EOB_TOKEN;
|
||||
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
|
||||
t->section = 13;
|
||||
t->skip_eob_node = 0;
|
||||
++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
|
||||
++t;
|
||||
|
||||
@@ -25,10 +25,11 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int Token;
|
||||
int Extra;
|
||||
const vp8_prob *context_tree;
|
||||
short Extra;
|
||||
unsigned char Token;
|
||||
unsigned char skip_eob_node;
|
||||
int skip_eob_node;
|
||||
int section;
|
||||
} TOKENEXTRA;
|
||||
|
||||
int rd_cost_mby(MACROBLOCKD *);
|
||||
|
||||
@@ -32,16 +32,6 @@
|
||||
unsigned int *sad_array\
|
||||
)
|
||||
|
||||
#define prototype_sad_multi_same_address_1(sym)\
|
||||
void (sym)\
|
||||
(\
|
||||
const unsigned char *src_ptr, \
|
||||
int source_stride, \
|
||||
const unsigned char *ref_ptr, \
|
||||
int ref_stride, \
|
||||
unsigned short *sad_array\
|
||||
)
|
||||
|
||||
#define prototype_sad_multi_dif_address(sym)\
|
||||
void (sym)\
|
||||
(\
|
||||
@@ -148,31 +138,6 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
|
||||
|
||||
#ifndef vp8_variance_sad16x16x8
|
||||
#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8);
|
||||
|
||||
#ifndef vp8_variance_sad16x8x8
|
||||
#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8);
|
||||
|
||||
#ifndef vp8_variance_sad8x8x8
|
||||
#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8);
|
||||
|
||||
#ifndef vp8_variance_sad8x16x8
|
||||
#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8);
|
||||
|
||||
#ifndef vp8_variance_sad4x4x8
|
||||
#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
|
||||
|
||||
//-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|
||||
#ifndef vp8_variance_sad16x16x4d
|
||||
@@ -309,7 +274,6 @@ extern prototype_sad(vp8_variance_get4x4sse_cs);
|
||||
|
||||
typedef prototype_sad(*vp8_sad_fn_t);
|
||||
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
|
||||
typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
|
||||
typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
|
||||
typedef prototype_variance(*vp8_variance_fn_t);
|
||||
typedef prototype_variance2(*vp8_variance2_fn_t);
|
||||
@@ -353,12 +317,6 @@ typedef struct
|
||||
vp8_sad_multi_fn_t sad8x8x3;
|
||||
vp8_sad_multi_fn_t sad4x4x3;
|
||||
|
||||
vp8_sad_multi1_fn_t sad16x16x8;
|
||||
vp8_sad_multi1_fn_t sad16x8x8;
|
||||
vp8_sad_multi1_fn_t sad8x16x8;
|
||||
vp8_sad_multi1_fn_t sad8x8x8;
|
||||
vp8_sad_multi1_fn_t sad4x4x8;
|
||||
|
||||
vp8_sad_multi_d_fn_t sad16x16x4d;
|
||||
vp8_sad_multi_d_fn_t sad16x8x4d;
|
||||
vp8_sad_multi_d_fn_t sad8x16x4d;
|
||||
@@ -376,7 +334,6 @@ typedef struct
|
||||
vp8_variance_fn_t svf_halfpix_v;
|
||||
vp8_variance_fn_t svf_halfpix_hv;
|
||||
vp8_sad_multi_fn_t sdx3f;
|
||||
vp8_sad_multi1_fn_t sdx8f;
|
||||
vp8_sad_multi_d_fn_t sdx4df;
|
||||
} vp8_variance_fn_ptr_t;
|
||||
|
||||
|
||||
@@ -11,231 +11,511 @@
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
section .text
|
||||
global sym(vp8_short_fdct4x4_mmx)
|
||||
global sym(vp8_short_fdct8x4_wmt)
|
||||
|
||||
|
||||
%define DCTCONSTANTSBITS (16)
|
||||
%define DCTROUNDINGVALUE (1<< (DCTCONSTANTSBITS-1))
|
||||
%define x_c1 (60547) ; cos(pi /8) * (1<<15)
|
||||
%define x_c2 (46341) ; cos(pi*2/8) * (1<<15)
|
||||
%define x_c3 (25080) ; cos(pi*3/8) * (1<<15)
|
||||
|
||||
|
||||
;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_mmx)
|
||||
sym(vp8_short_fdct4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;input
|
||||
mov rdi, arg(1) ;output
|
||||
|
||||
mov rsi, arg(0) ; input
|
||||
mov rdi, arg(1) ; output
|
||||
lea rdx, [GLOBAL(dct_const_mmx)]
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
|
||||
lea rcx, [rsi + rax*2]
|
||||
lea rcx, [rsi + rax*2]
|
||||
; read the input data
|
||||
movq mm0, [rsi]
|
||||
movq mm1, [rsi + rax]
|
||||
movq mm0, [rsi]
|
||||
movq mm1, [rsi + rax ]
|
||||
|
||||
movq mm2, [rcx]
|
||||
movq mm4, [rcx + rax]
|
||||
movq mm2, [rcx]
|
||||
movq mm3, [rcx + rax]
|
||||
; get the constants
|
||||
;shift to left by 1 for prescision
|
||||
psllw mm0, 3
|
||||
psllw mm1, 3
|
||||
|
||||
; transpose for the first stage
|
||||
movq mm3, mm0 ; 00 01 02 03
|
||||
movq mm5, mm2 ; 20 21 22 23
|
||||
psllw mm2, 3
|
||||
psllw mm3, 3
|
||||
|
||||
punpcklwd mm0, mm1 ; 00 10 01 11
|
||||
punpckhwd mm3, mm1 ; 02 12 03 13
|
||||
; transpose for the second stage
|
||||
movq mm4, mm0 ; 00 01 02 03
|
||||
movq mm5, mm2 ; 10 11 12 03
|
||||
|
||||
punpcklwd mm2, mm4 ; 20 30 21 31
|
||||
punpckhwd mm5, mm4 ; 22 32 23 33
|
||||
punpcklwd mm0, mm1 ; 00 10 01 11
|
||||
punpckhwd mm4, mm1 ; 02 12 03 13
|
||||
|
||||
movq mm1, mm0 ; 00 10 01 11
|
||||
punpckldq mm0, mm2 ; 00 10 20 30
|
||||
punpcklwd mm2, mm3 ; 20 30 21 31
|
||||
punpckhwd mm5, mm3 ; 22 32 23 33
|
||||
|
||||
punpckhdq mm1, mm2 ; 01 11 21 31
|
||||
|
||||
movq mm2, mm3 ; 02 12 03 13
|
||||
punpckldq mm2, mm5 ; 02 12 22 32
|
||||
movq mm1, mm0 ; 00 10 01 11
|
||||
punpckldq mm0, mm2 ; 00 10 20 30
|
||||
|
||||
punpckhdq mm3, mm5 ; 03 13 23 33
|
||||
punpckhdq mm1, mm2 ; 01 11 21 31
|
||||
|
||||
movq mm2, mm4 ; 02 12 03 13
|
||||
punpckldq mm2, mm5 ; 02 12 22 32
|
||||
|
||||
punpckhdq mm4, mm5 ; 03 13 23 33
|
||||
movq mm3, mm4
|
||||
|
||||
; mm0 0
|
||||
; mm1 1
|
||||
; mm2 2
|
||||
; mm3 3
|
||||
|
||||
; first stage
|
||||
movq mm5, mm0
|
||||
movq mm4, mm1
|
||||
movq mm5, mm0
|
||||
movq mm4, mm1
|
||||
|
||||
paddw mm0, mm3 ; a1 = 0 + 3
|
||||
paddw mm1, mm2 ; b1 = 1 + 2
|
||||
paddw mm0, mm3 ; a = 0 + 3
|
||||
paddw mm1, mm2 ; b = 1 + 2
|
||||
|
||||
psubw mm4, mm2 ; c1 = 1 - 2
|
||||
psubw mm5, mm3 ; d1 = 0 - 3
|
||||
psubw mm4, mm2 ; c = 1 - 2
|
||||
psubw mm5, mm3 ; d = 0 - 3
|
||||
|
||||
psllw mm5, 3
|
||||
psllw mm4, 3
|
||||
|
||||
psllw mm0, 3
|
||||
psllw mm1, 3
|
||||
|
||||
; output 0 and 2
|
||||
movq mm2, mm0 ; a1
|
||||
movq mm6, [rdx + 16] ; c2
|
||||
movq mm2, mm0 ; a
|
||||
|
||||
paddw mm0, mm1 ; op[0] = a1 + b1
|
||||
psubw mm2, mm1 ; op[2] = a1 - b1
|
||||
paddw mm0, mm1 ; a + b
|
||||
psubw mm2, mm1 ; a - b
|
||||
|
||||
movq mm1, mm0 ; a + b
|
||||
pmulhw mm0, mm6 ; 00 01 02 03
|
||||
|
||||
paddw mm0, mm1 ; output 00 01 02 03
|
||||
pmulhw mm6, mm2 ; 20 21 22 23
|
||||
|
||||
paddw mm2, mm6 ; output 20 21 22 23
|
||||
|
||||
; output 1 and 3
|
||||
; interleave c1, d1
|
||||
movq mm1, mm5 ; d1
|
||||
punpcklwd mm1, mm4 ; c1 d1
|
||||
punpckhwd mm5, mm4 ; c1 d1
|
||||
movq mm6, [rdx + 8] ; c1
|
||||
movq mm7, [rdx + 24] ; c3
|
||||
|
||||
movq mm3, mm1
|
||||
movq mm4, mm5
|
||||
movq mm1, mm4 ; c
|
||||
movq mm3, mm5 ; d
|
||||
|
||||
pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmulhw mm1, mm7 ; c * c3
|
||||
pmulhw mm3, mm6 ; d * c1
|
||||
|
||||
pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
paddw mm3, mm5 ; d * c1 rounded
|
||||
paddw mm1, mm3 ; output 10 11 12 13
|
||||
|
||||
paddd mm1, MMWORD PTR[GLOBAL(_14500)]
|
||||
paddd mm4, MMWORD PTR[GLOBAL(_14500)]
|
||||
paddd mm3, MMWORD PTR[GLOBAL(_7500)]
|
||||
paddd mm5, MMWORD PTR[GLOBAL(_7500)]
|
||||
movq mm3, mm4 ; c
|
||||
pmulhw mm5, mm7 ; d * c3
|
||||
|
||||
psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
pmulhw mm4, mm6 ; c * c1
|
||||
paddw mm3, mm4 ; round c* c1
|
||||
|
||||
psubw mm5, mm3 ; output 30 31 32 33
|
||||
movq mm3, mm5
|
||||
|
||||
packssdw mm1, mm4 ; op[1]
|
||||
packssdw mm3, mm5 ; op[3]
|
||||
|
||||
; done with vertical
|
||||
; transpose for the second stage
|
||||
movq mm4, mm0 ; 00 10 20 30
|
||||
movq mm5, mm2 ; 02 12 22 32
|
||||
movq mm4, mm0 ; 00 01 02 03
|
||||
movq mm5, mm2 ; 10 11 12 03
|
||||
|
||||
punpcklwd mm0, mm1 ; 00 01 10 11
|
||||
punpckhwd mm4, mm1 ; 20 21 30 31
|
||||
punpcklwd mm0, mm1 ; 00 10 01 11
|
||||
punpckhwd mm4, mm1 ; 02 12 03 13
|
||||
|
||||
punpcklwd mm2, mm3 ; 02 03 12 13
|
||||
punpckhwd mm5, mm3 ; 22 23 32 33
|
||||
punpcklwd mm2, mm3 ; 20 30 21 31
|
||||
punpckhwd mm5, mm3 ; 22 32 23 33
|
||||
|
||||
movq mm1, mm0 ; 00 01 10 11
|
||||
punpckldq mm0, mm2 ; 00 01 02 03
|
||||
|
||||
punpckhdq mm1, mm2 ; 01 22 12 13
|
||||
movq mm1, mm0 ; 00 10 01 11
|
||||
punpckldq mm0, mm2 ; 00 10 20 30
|
||||
|
||||
movq mm2, mm4 ; 20 31 30 31
|
||||
punpckldq mm2, mm5 ; 20 21 22 23
|
||||
punpckhdq mm1, mm2 ; 01 11 21 31
|
||||
|
||||
punpckhdq mm4, mm5 ; 30 31 32 33
|
||||
movq mm2, mm4 ; 02 12 03 13
|
||||
punpckldq mm2, mm5 ; 02 12 22 32
|
||||
|
||||
; mm0 0
|
||||
; mm1 1
|
||||
; mm2 2
|
||||
; mm3 4
|
||||
punpckhdq mm4, mm5 ; 03 13 23 33
|
||||
movq mm3, mm4
|
||||
|
||||
movq mm5, mm0
|
||||
movq mm3, mm1
|
||||
|
||||
paddw mm0, mm4 ; a1 = 0 + 3
|
||||
paddw mm1, mm2 ; b1 = 1 + 2
|
||||
; first stage
|
||||
movq mm5, mm0
|
||||
movq mm4, mm1
|
||||
|
||||
psubw mm3, mm2 ; c1 = 1 - 2
|
||||
psubw mm5, mm4 ; d1 = 0 - 3
|
||||
paddw mm0, mm3 ; a = 0 + 3
|
||||
paddw mm1, mm2 ; b = 1 + 2
|
||||
|
||||
pxor mm6, mm6 ; zero out for compare
|
||||
psubw mm4, mm2 ; c = 1 - 2
|
||||
psubw mm5, mm3 ; d = 0 - 3
|
||||
|
||||
pcmpeqw mm6, mm5 ; d1 != 0
|
||||
|
||||
pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper,
|
||||
; and keep bit 0 of lower
|
||||
|
||||
; output 0 and 2
|
||||
movq mm2, mm0 ; a1
|
||||
movq mm6, [rdx + 16] ; c2
|
||||
movq mm2, mm0 ; a
|
||||
paddw mm0, mm1 ; a + b
|
||||
|
||||
paddw mm0, mm1 ; a1 + b1
|
||||
psubw mm2, mm1 ; a1 - b1
|
||||
psubw mm2, mm1 ; a - b
|
||||
|
||||
paddw mm0, MMWORD PTR[GLOBAL(_7w)]
|
||||
paddw mm2, MMWORD PTR[GLOBAL(_7w)]
|
||||
movq mm1, mm0 ; a + b
|
||||
pmulhw mm0, mm6 ; 00 01 02 03
|
||||
|
||||
psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4
|
||||
psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4
|
||||
paddw mm0, mm1 ; output 00 01 02 03
|
||||
pmulhw mm6, mm2 ; 20 21 22 23
|
||||
|
||||
paddw mm2, mm6 ; output 20 21 22 23
|
||||
|
||||
movq MMWORD PTR[rdi + 0 ], mm0
|
||||
movq MMWORD PTR[rdi + 16], mm2
|
||||
|
||||
; output 1 and 3
|
||||
; interleave c1, d1
|
||||
movq mm1, mm5 ; d1
|
||||
punpcklwd mm1, mm3 ; c1 d1
|
||||
punpckhwd mm5, mm3 ; c1 d1
|
||||
movq mm6, [rdx + 8] ; c1
|
||||
movq mm7, [rdx + 24] ; c3
|
||||
|
||||
movq mm3, mm1
|
||||
movq mm4, mm5
|
||||
movq mm1, mm4 ; c
|
||||
movq mm3, mm5 ; d
|
||||
|
||||
pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmulhw mm1, mm7 ; c * c3
|
||||
pmulhw mm3, mm6 ; d * c1
|
||||
|
||||
pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
paddw mm3, mm5 ; d * c1 rounded
|
||||
paddw mm1, mm3 ; output 10 11 12 13
|
||||
|
||||
paddd mm1, MMWORD PTR[GLOBAL(_12000)]
|
||||
paddd mm4, MMWORD PTR[GLOBAL(_12000)]
|
||||
paddd mm3, MMWORD PTR[GLOBAL(_51000)]
|
||||
paddd mm5, MMWORD PTR[GLOBAL(_51000)]
|
||||
movq mm3, mm4 ; c
|
||||
pmulhw mm5, mm7 ; d * c3
|
||||
|
||||
psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
pmulhw mm4, mm6 ; c * c1
|
||||
paddw mm3, mm4 ; round c* c1
|
||||
|
||||
packssdw mm1, mm4 ; op[4]
|
||||
packssdw mm3, mm5 ; op[12]
|
||||
psubw mm5, mm3 ; output 30 31 32 33
|
||||
movq mm3, mm5
|
||||
; done with vertical
|
||||
|
||||
paddw mm1, mm6 ; op[4] += (d1!=0)
|
||||
pcmpeqw mm4, mm4
|
||||
pcmpeqw mm5, mm5
|
||||
psrlw mm4, 15
|
||||
psrlw mm5, 15
|
||||
|
||||
movq MMWORD PTR[rdi + 8 ], mm1
|
||||
movq MMWORD PTR[rdi + 24], mm3
|
||||
psllw mm4, 2
|
||||
psllw mm5, 2
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
paddw mm0, mm4
|
||||
paddw mm1, mm5
|
||||
paddw mm2, mm4
|
||||
paddw mm3, mm5
|
||||
|
||||
psraw mm0, 3
|
||||
psraw mm1, 3
|
||||
psraw mm2, 3
|
||||
psraw mm3, 3
|
||||
|
||||
movq [rdi ], mm0
|
||||
movq [rdi+ 8], mm1
|
||||
movq [rdi+16], mm2
|
||||
movq [rdi+24], mm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
|
||||
sym(vp8_short_fdct8x4_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;input
|
||||
mov rdi, arg(1) ;output
|
||||
|
||||
lea rdx, [GLOBAL(dct_const_xmm)]
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
|
||||
lea rcx, [rsi + rax*2]
|
||||
; read the input data
|
||||
movdqa xmm0, [rsi]
|
||||
movdqa xmm2, [rsi + rax]
|
||||
|
||||
movdqa xmm4, [rcx]
|
||||
movdqa xmm3, [rcx + rax]
|
||||
; get the constants
|
||||
;shift to left by 1 for prescision
|
||||
psllw xmm0, 3
|
||||
psllw xmm2, 3
|
||||
|
||||
psllw xmm4, 3
|
||||
psllw xmm3, 3
|
||||
|
||||
; transpose for the second stage
|
||||
movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
|
||||
punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
|
||||
|
||||
punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
|
||||
punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
|
||||
|
||||
movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
|
||||
punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
|
||||
|
||||
punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
|
||||
movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
|
||||
punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
|
||||
|
||||
punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
|
||||
movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
|
||||
punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
|
||||
punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
|
||||
|
||||
punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
|
||||
|
||||
; xmm0 0
|
||||
; xmm1 1
|
||||
; xmm2 2
|
||||
; xmm3 3
|
||||
|
||||
; first stage
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm4, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a = 0 + 3
|
||||
paddw xmm1, xmm2 ; b = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c = 1 - 2
|
||||
psubw xmm5, xmm3 ; d = 0 - 3
|
||||
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm6, [rdx + 32] ; c2
|
||||
movdqa xmm2, xmm0 ; a
|
||||
|
||||
paddw xmm0, xmm1 ; a + b
|
||||
psubw xmm2, xmm1 ; a - b
|
||||
|
||||
movdqa xmm1, xmm0 ; a + b
|
||||
pmulhw xmm0, xmm6 ; 00 01 02 03
|
||||
|
||||
paddw xmm0, xmm1 ; output 00 01 02 03
|
||||
pmulhw xmm6, xmm2 ; 20 21 22 23
|
||||
|
||||
paddw xmm2, xmm6 ; output 20 21 22 23
|
||||
|
||||
; output 1 and 3
|
||||
movdqa xmm6, [rdx + 16] ; c1
|
||||
movdqa xmm7, [rdx + 48] ; c3
|
||||
|
||||
movdqa xmm1, xmm4 ; c
|
||||
movdqa xmm3, xmm5 ; d
|
||||
|
||||
pmulhw xmm1, xmm7 ; c * c3
|
||||
pmulhw xmm3, xmm6 ; d * c1
|
||||
|
||||
paddw xmm3, xmm5 ; d * c1 rounded
|
||||
paddw xmm1, xmm3 ; output 10 11 12 13
|
||||
|
||||
movdqa xmm3, xmm4 ; c
|
||||
pmulhw xmm5, xmm7 ; d * c3
|
||||
|
||||
pmulhw xmm4, xmm6 ; c * c1
|
||||
paddw xmm3, xmm4 ; round c* c1
|
||||
|
||||
psubw xmm5, xmm3 ; output 30 31 32 33
|
||||
movdqa xmm3, xmm5
|
||||
|
||||
|
||||
; done with vertical
|
||||
; transpose for the second stage
|
||||
movdqa xmm4, xmm2 ; 02 12 22 32 06 16 26 36
|
||||
movdqa xmm2, xmm1 ; 01 11 21 31 05 15 25 35
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 10 20 30 04 14 24 34
|
||||
movdqa xmm5, xmm4 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
punpcklwd xmm0, xmm2 ; 00 01 10 11 20 21 30 31
|
||||
punpckhwd xmm1, xmm2 ; 04 05 14 15 24 25 34 35
|
||||
|
||||
punpcklwd xmm4, xmm3 ; 02 03 12 13 22 23 32 33
|
||||
punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
|
||||
|
||||
movdqa xmm2, xmm0 ; 00 01 10 11 20 21 30 31
|
||||
punpckldq xmm0, xmm4 ; 00 01 02 03 10 11 12 13
|
||||
|
||||
punpckhdq xmm2, xmm4 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
|
||||
movdqa xmm4, xmm1 ; 04 05 14 15 24 25 34 35
|
||||
punpckldq xmm4, xmm5 ; 04 05 06 07 14 15 16 17
|
||||
|
||||
punpckhdq xmm1, xmm5 ; 24 25 26 27 34 35 36 37
|
||||
movdqa xmm3, xmm2 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
punpckhqdq xmm3, xmm1 ; 30 31 32 33 34 35 36 37
|
||||
punpcklqdq xmm2, xmm1 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 01 02 03 10 11 12 13
|
||||
punpcklqdq xmm0, xmm4 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
punpckhqdq xmm1, xmm4 ; 10 11 12 13 14 15 16 17
|
||||
|
||||
; first stage
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm4, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a = 0 + 3
|
||||
paddw xmm1, xmm2 ; b = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c = 1 - 2
|
||||
psubw xmm5, xmm3 ; d = 0 - 3
|
||||
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm6, [rdx + 32] ; c2
|
||||
movdqa xmm2, xmm0 ; a
|
||||
|
||||
paddw xmm0, xmm1 ; a + b
|
||||
psubw xmm2, xmm1 ; a - b
|
||||
|
||||
movdqa xmm1, xmm0 ; a + b
|
||||
pmulhw xmm0, xmm6 ; 00 01 02 03
|
||||
|
||||
paddw xmm0, xmm1 ; output 00 01 02 03
|
||||
pmulhw xmm6, xmm2 ; 20 21 22 23
|
||||
|
||||
paddw xmm2, xmm6 ; output 20 21 22 23
|
||||
|
||||
; output 1 and 3
|
||||
movdqa xmm6, [rdx + 16] ; c1
|
||||
movdqa xmm7, [rdx + 48] ; c3
|
||||
|
||||
movdqa xmm1, xmm4 ; c
|
||||
movdqa xmm3, xmm5 ; d
|
||||
|
||||
pmulhw xmm1, xmm7 ; c * c3
|
||||
pmulhw xmm3, xmm6 ; d * c1
|
||||
|
||||
paddw xmm3, xmm5 ; d * c1 rounded
|
||||
paddw xmm1, xmm3 ; output 10 11 12 13
|
||||
|
||||
movdqa xmm3, xmm4 ; c
|
||||
pmulhw xmm5, xmm7 ; d * c3
|
||||
|
||||
pmulhw xmm4, xmm6 ; c * c1
|
||||
paddw xmm3, xmm4 ; round c* c1
|
||||
|
||||
psubw xmm5, xmm3 ; output 30 31 32 33
|
||||
movdqa xmm3, xmm5
|
||||
; done with vertical
|
||||
|
||||
|
||||
pcmpeqw xmm4, xmm4
|
||||
pcmpeqw xmm5, xmm5;
|
||||
psrlw xmm4, 15
|
||||
psrlw xmm5, 15
|
||||
|
||||
psllw xmm4, 2
|
||||
psllw xmm5, 2
|
||||
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm5
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm5
|
||||
|
||||
psraw xmm0, 3
|
||||
psraw xmm1, 3
|
||||
psraw xmm2, 3
|
||||
psraw xmm3, 3
|
||||
|
||||
movq QWORD PTR[rdi ], xmm0
|
||||
movq QWORD PTR[rdi+ 8], xmm1
|
||||
movq QWORD PTR[rdi+16], xmm2
|
||||
movq QWORD PTR[rdi+24], xmm3
|
||||
|
||||
psrldq xmm0, 8
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm2, 8
|
||||
psrldq xmm3, 8
|
||||
|
||||
movq QWORD PTR[rdi+32], xmm0
|
||||
movq QWORD PTR[rdi+40], xmm1
|
||||
movq QWORD PTR[rdi+48], xmm2
|
||||
movq QWORD PTR[rdi+56], xmm3
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 8
|
||||
_5352_2217:
|
||||
dw 5352
|
||||
dw 2217
|
||||
dw 5352
|
||||
dw 2217
|
||||
align 8
|
||||
_2217_neg5352:
|
||||
dw 2217
|
||||
dw -5352
|
||||
dw 2217
|
||||
dw -5352
|
||||
align 8
|
||||
_cmp_mask:
|
||||
times 4 dw 1
|
||||
align 8
|
||||
_7w:
|
||||
times 4 dw 7
|
||||
align 8
|
||||
_14500:
|
||||
times 2 dd 14500
|
||||
align 8
|
||||
_7500:
|
||||
times 2 dd 7500
|
||||
align 8
|
||||
_12000:
|
||||
times 2 dd 12000
|
||||
align 8
|
||||
_51000:
|
||||
times 2 dd 51000
|
||||
;static const unsigned int dct1st_stage_rounding_mmx[2] =
|
||||
align 16
|
||||
dct1st_stage_rounding_mmx:
|
||||
times 2 dd 8192
|
||||
|
||||
|
||||
;static const unsigned int dct2nd_stage_rounding_mmx[2] =
|
||||
align 16
|
||||
dct2nd_stage_rounding_mmx:
|
||||
times 2 dd 32768
|
||||
|
||||
|
||||
;static const short dct_matrix[4][4]=
|
||||
align 16
|
||||
dct_matrix:
|
||||
times 4 dw 23170
|
||||
|
||||
dw 30274
|
||||
dw 12540
|
||||
dw -12540
|
||||
dw -30274
|
||||
|
||||
dw 23170
|
||||
times 2 dw -23170
|
||||
dw 23170
|
||||
|
||||
dw 12540
|
||||
dw -30274
|
||||
dw 30274
|
||||
dw -12540
|
||||
|
||||
|
||||
;static const unsigned short dct_const_mmx[4 * 4]=
|
||||
align 16
|
||||
dct_const_mmx:
|
||||
times 4 dw 0
|
||||
times 4 dw 60547
|
||||
times 4 dw 46341
|
||||
times 4 dw 25080
|
||||
|
||||
|
||||
;static const unsigned short dct_const_xmm[8 * 4]=
|
||||
align 16
|
||||
dct_const_xmm:
|
||||
times 8 dw 0
|
||||
times 8 dw 60547
|
||||
times 8 dw 46341
|
||||
times 8 dw 25080
|
||||
|
||||
@@ -11,68 +11,32 @@
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro STACK_FRAME_CREATE 0
|
||||
%if ABI_IS_32BIT
|
||||
%define input rsi
|
||||
%define output rdi
|
||||
%define pitch rax
|
||||
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_sse2)
|
||||
sym(vp8_short_fdct4x4_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
;; SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0)
|
||||
mov rdi, arg(1)
|
||||
movsxd rax, DWORD PTR arg(2)
|
||||
lea rdi, [rsi + rax*2]
|
||||
|
||||
movsxd rax, dword ptr arg(2)
|
||||
lea rcx, [rsi + rax*2]
|
||||
%else
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
%define input rcx
|
||||
%define output rdx
|
||||
%define pitch r8
|
||||
%else
|
||||
%define input rdi
|
||||
%define output rsi
|
||||
%define pitch rdx
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY 0
|
||||
%define input
|
||||
%define output
|
||||
%define pitch
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
pop rbp
|
||||
%else
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_sse2)
|
||||
sym(vp8_short_fdct4x4_sse2):
|
||||
|
||||
STACK_FRAME_CREATE
|
||||
|
||||
movq xmm0, MMWORD PTR[input ] ;03 02 01 00
|
||||
movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10
|
||||
lea input, [input+2*pitch]
|
||||
movq xmm1, MMWORD PTR[input ] ;23 22 21 20
|
||||
movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30
|
||||
movq xmm0, MMWORD PTR[rsi ] ;03 02 01 00
|
||||
movq xmm2, MMWORD PTR[rsi + rax] ;13 12 11 10
|
||||
movq xmm1, MMWORD PTR[rsi + rax*2] ;23 22 21 20
|
||||
movq xmm3, MMWORD PTR[rdi + rax] ;33 32 31 30
|
||||
|
||||
punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00
|
||||
punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20
|
||||
|
||||
mov rdi, arg(1)
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00
|
||||
punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10
|
||||
@@ -87,7 +51,6 @@ sym(vp8_short_fdct4x4_sse2):
|
||||
psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1
|
||||
psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3
|
||||
psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
|
||||
@@ -158,216 +121,17 @@ sym(vp8_short_fdct4x4_sse2):
|
||||
punpcklqdq xmm0, xmm3 ;op[4] op[0]
|
||||
punpckhqdq xmm1, xmm3 ;op[12] op[8]
|
||||
|
||||
movdqa XMMWORD PTR[output + 0], xmm0
|
||||
movdqa XMMWORD PTR[output + 16], xmm1
|
||||
movdqa XMMWORD PTR[rdi + 0], xmm0
|
||||
movdqa XMMWORD PTR[rdi + 16], xmm1
|
||||
|
||||
STACK_FRAME_DESTROY
|
||||
|
||||
;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct8x4_sse2)
|
||||
sym(vp8_short_fdct8x4_sse2):
|
||||
|
||||
STACK_FRAME_CREATE
|
||||
|
||||
; read the input data
|
||||
movdqa xmm0, [input ]
|
||||
movdqa xmm2, [input+ pitch]
|
||||
lea input, [input+2*pitch]
|
||||
movdqa xmm4, [input ]
|
||||
movdqa xmm3, [input+ pitch]
|
||||
|
||||
; transpose for the first stage
|
||||
movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
|
||||
punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
|
||||
|
||||
punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
|
||||
punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
|
||||
|
||||
movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
|
||||
punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
|
||||
|
||||
punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
|
||||
punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
|
||||
|
||||
punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
|
||||
movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
|
||||
punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
|
||||
punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
|
||||
|
||||
punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
|
||||
|
||||
; xmm0 0
|
||||
; xmm1 1
|
||||
; xmm2 2
|
||||
; xmm3 3
|
||||
|
||||
; first stage
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm4, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a1 = 0 + 3
|
||||
paddw xmm1, xmm2 ; b1 = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c1 = 1 - 2
|
||||
psubw xmm5, xmm3 ; d1 = 0 - 3
|
||||
|
||||
psllw xmm5, 3
|
||||
psllw xmm4, 3
|
||||
|
||||
psllw xmm0, 3
|
||||
psllw xmm1, 3
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm2, xmm0 ; a1
|
||||
|
||||
paddw xmm0, xmm1 ; op[0] = a1 + b1
|
||||
psubw xmm2, xmm1 ; op[2] = a1 - b1
|
||||
|
||||
; output 1 and 3
|
||||
; interleave c1, d1
|
||||
movdqa xmm1, xmm5 ; d1
|
||||
punpcklwd xmm1, xmm4 ; c1 d1
|
||||
punpckhwd xmm5, xmm4 ; c1 d1
|
||||
|
||||
movdqa xmm3, xmm1
|
||||
movdqa xmm4, xmm5
|
||||
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
|
||||
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
|
||||
paddd xmm1, XMMWORD PTR[GLOBAL(_14500)]
|
||||
paddd xmm4, XMMWORD PTR[GLOBAL(_14500)]
|
||||
paddd xmm3, XMMWORD PTR[GLOBAL(_7500)]
|
||||
paddd xmm5, XMMWORD PTR[GLOBAL(_7500)]
|
||||
|
||||
psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
|
||||
packssdw xmm1, xmm4 ; op[1]
|
||||
packssdw xmm3, xmm5 ; op[3]
|
||||
|
||||
; done with vertical
|
||||
; transpose for the second stage
|
||||
movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34
|
||||
movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31
|
||||
punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35
|
||||
|
||||
punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33
|
||||
punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31
|
||||
punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13
|
||||
|
||||
punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35
|
||||
punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17
|
||||
|
||||
punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37
|
||||
movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37
|
||||
punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13
|
||||
punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17
|
||||
|
||||
; xmm0 0
|
||||
; xmm1 4
|
||||
; xmm2 1
|
||||
; xmm3 3
|
||||
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a1 = 0 + 3
|
||||
paddw xmm1, xmm4 ; b1 = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c1 = 1 - 2
|
||||
psubw xmm5, xmm3 ; d1 = 0 - 3
|
||||
|
||||
pxor xmm6, xmm6 ; zero out for compare
|
||||
|
||||
pcmpeqw xmm6, xmm5 ; d1 != 0
|
||||
|
||||
pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper,
|
||||
; and keep bit 0 of lower
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm2, xmm0 ; a1
|
||||
|
||||
paddw xmm0, xmm1 ; a1 + b1
|
||||
psubw xmm2, xmm1 ; a1 - b1
|
||||
|
||||
paddw xmm0, XMMWORD PTR[GLOBAL(_7w)]
|
||||
paddw xmm2, XMMWORD PTR[GLOBAL(_7w)]
|
||||
|
||||
psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4
|
||||
psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4
|
||||
|
||||
; output 1 and 3
|
||||
; interleave c1, d1
|
||||
movdqa xmm1, xmm5 ; d1
|
||||
punpcklwd xmm1, xmm4 ; c1 d1
|
||||
punpckhwd xmm5, xmm4 ; c1 d1
|
||||
|
||||
movdqa xmm3, xmm1
|
||||
movdqa xmm4, xmm5
|
||||
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
|
||||
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
|
||||
paddd xmm1, XMMWORD PTR[GLOBAL(_12000)]
|
||||
paddd xmm4, XMMWORD PTR[GLOBAL(_12000)]
|
||||
paddd xmm3, XMMWORD PTR[GLOBAL(_51000)]
|
||||
paddd xmm5, XMMWORD PTR[GLOBAL(_51000)]
|
||||
|
||||
psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
|
||||
packssdw xmm1, xmm4 ; op[4]
|
||||
packssdw xmm3, xmm5 ; op[12]
|
||||
|
||||
paddw xmm1, xmm6 ; op[4] += (d1!=0)
|
||||
|
||||
movdqa xmm4, xmm0
|
||||
movdqa xmm5, xmm2
|
||||
|
||||
punpcklqdq xmm0, xmm1
|
||||
punpckhqdq xmm4, xmm1
|
||||
|
||||
punpcklqdq xmm2, xmm3
|
||||
punpckhqdq xmm5, xmm3
|
||||
|
||||
movdqa XMMWORD PTR[output + 0 ], xmm0
|
||||
movdqa XMMWORD PTR[output + 16], xmm2
|
||||
movdqa XMMWORD PTR[output + 32], xmm4
|
||||
movdqa XMMWORD PTR[output + 48], xmm5
|
||||
|
||||
STACK_FRAME_DESTROY
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
;; RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
@@ -397,9 +161,7 @@ align 16
|
||||
_cmp_mask:
|
||||
times 4 dw 1
|
||||
times 4 dw 0
|
||||
align 16
|
||||
_cmp_mask8x4:
|
||||
times 8 dw 1
|
||||
|
||||
align 16
|
||||
_mult_sub:
|
||||
dw 1
|
||||
@@ -414,9 +176,6 @@ align 16
|
||||
_7:
|
||||
times 4 dd 7
|
||||
align 16
|
||||
_7w:
|
||||
times 8 dw 7
|
||||
align 16
|
||||
_14500:
|
||||
times 4 dd 14500
|
||||
align 16
|
||||
|
||||
@@ -24,31 +24,33 @@ extern prototype_fdct(vp8_short_fdct4x4_mmx);
|
||||
extern prototype_fdct(vp8_short_fdct8x4_mmx);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
#if 0
|
||||
#undef vp8_fdct_short4x4
|
||||
#define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
|
||||
|
||||
#undef vp8_fdct_short8x4
|
||||
#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_SSE2
|
||||
extern prototype_fdct(vp8_short_fdct8x4_sse2);
|
||||
extern prototype_fdct(vp8_short_fdct8x4_wmt);
|
||||
extern prototype_fdct(vp8_short_walsh4x4_sse2);
|
||||
|
||||
extern prototype_fdct(vp8_short_fdct4x4_sse2);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
#if 1
|
||||
/* short SSE2 DCT currently disabled, does not match the MMX version */
|
||||
#undef vp8_fdct_short4x4
|
||||
#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
|
||||
|
||||
#undef vp8_fdct_short8x4
|
||||
#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
|
||||
#endif
|
||||
|
||||
#undef vp8_fdct_fast4x4
|
||||
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
|
||||
@@ -56,7 +58,7 @@ extern prototype_fdct(vp8_short_fdct4x4_sse2);
|
||||
#undef vp8_fdct_fast8x4
|
||||
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
|
||||
|
||||
#undef vp8_fdct_walsh_short4x4
|
||||
#undef vp8_fdct_walsh_short4x4
|
||||
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_sse2
|
||||
|
||||
#endif
|
||||
|
||||
@@ -24,14 +24,5 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
#undef vp8_search_full_search
|
||||
#define vp8_search_full_search vp8_full_search_sadx8
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
298
vp8/encoder/x86/preproc_mmx.c
Normal file
298
vp8/encoder/x86/preproc_mmx.c
Normal file
@@ -0,0 +1,298 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "memory.h"
|
||||
#include "preproc.h"
|
||||
#include "pragmas.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Macros
|
||||
****************************************************************************/
|
||||
#define FRAMECOUNT 7
|
||||
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
|
||||
|
||||
/****************************************************************************
|
||||
* Imports
|
||||
****************************************************************************/
|
||||
extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Global Variables
|
||||
****************************************************************************/
|
||||
void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : temp_filter_wmt
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* unsigned char *s : Pointer to source frame.
|
||||
* unsigned char *d : Pointer to destination frame.
|
||||
* int bytes : Number of bytes to filter.
|
||||
* int strength : Strength of filter to apply.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs a closesness adjusted temporarl blur
|
||||
*
|
||||
* SPECIAL NOTES : Destination frame can be same as source frame.
|
||||
*
|
||||
****************************************************************************/
|
||||
void temp_filter_wmt
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
)
|
||||
{
|
||||
int byte = 0;
|
||||
unsigned char *frameptr = ppi->frame_buffer;
|
||||
|
||||
__declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3};
|
||||
__declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16, 16, 16, 16, 16};
|
||||
|
||||
if (ppi->frame == 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
int i;
|
||||
int frame = 0;
|
||||
|
||||
do
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
*frameptr = s[byte+i];
|
||||
++frameptr;
|
||||
}
|
||||
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
d[byte+i] = s[byte+i];
|
||||
|
||||
byte += 8;
|
||||
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int offset2 = (ppi->frame % FRAMECOUNT);
|
||||
|
||||
do
|
||||
{
|
||||
__declspec(align(16)) unsigned short counts[8];
|
||||
__declspec(align(16)) unsigned short sums[8];
|
||||
__asm
|
||||
{
|
||||
mov eax, offset2
|
||||
mov edi, s // source pixels
|
||||
pxor xmm1, xmm1 // accumulator
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
mov esi, frameptr // accumulator
|
||||
pxor xmm2, xmm2 // count
|
||||
|
||||
movq xmm3, QWORD PTR [edi]
|
||||
|
||||
movq QWORD PTR [esi+8*eax], xmm3
|
||||
|
||||
punpcklbw xmm3, xmm2 // xmm3 source pixels
|
||||
mov ecx, FRAMECOUNT
|
||||
|
||||
next_frame:
|
||||
movq xmm4, QWORD PTR [esi] // get frame buffer values
|
||||
punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels
|
||||
movdqa xmm6, xmm4 // save the pixel values
|
||||
psubsw xmm4, xmm3 // subtracted pixel values
|
||||
pmullw xmm4, xmm4 // square xmm4
|
||||
movd xmm5, strength
|
||||
psrlw xmm4, xmm5 // should be strength
|
||||
pmullw xmm4, threes // 3 * modifier
|
||||
movdqa xmm5, sixteens // 16s
|
||||
psubusw xmm5, xmm4 // 16 - modifiers
|
||||
movdqa xmm4, xmm5 // save the modifiers
|
||||
pmullw xmm4, xmm6 // multiplier values
|
||||
paddusw xmm1, xmm4 // accumulator
|
||||
paddusw xmm2, xmm5 // count
|
||||
add esi, 8 // next frame
|
||||
dec ecx // next set of eight pixels
|
||||
jnz next_frame
|
||||
|
||||
movdqa counts, xmm2
|
||||
psrlw xmm2, 1 // divide count by 2 for rounding
|
||||
paddusw xmm1, xmm2 // rounding added in
|
||||
|
||||
mov frameptr, esi
|
||||
|
||||
movdqa sums, xmm1
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
|
||||
blurvalue >>= 16;
|
||||
d[i] = blurvalue;
|
||||
}
|
||||
|
||||
s += 8;
|
||||
d += 8;
|
||||
byte += 8;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
|
||||
++ppi->frame;
|
||||
__asm emms
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : temp_filter_mmx
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* unsigned char *s : Pointer to source frame.
|
||||
* unsigned char *d : Pointer to destination frame.
|
||||
* int bytes : Number of bytes to filter.
|
||||
* int strength : Strength of filter to apply.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs a closesness adjusted temporarl blur
|
||||
*
|
||||
* SPECIAL NOTES : Destination frame can be same as source frame.
|
||||
*
|
||||
****************************************************************************/
|
||||
void temp_filter_mmx
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
)
|
||||
{
|
||||
int byte = 0;
|
||||
unsigned char *frameptr = ppi->frame_buffer;
|
||||
|
||||
__declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3};
|
||||
__declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16};
|
||||
|
||||
if (ppi->frame == 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
int i;
|
||||
int frame = 0;
|
||||
|
||||
do
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
*frameptr = s[byte+i];
|
||||
++frameptr;
|
||||
}
|
||||
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
d[byte+i] = s[byte+i];
|
||||
|
||||
byte += 4;
|
||||
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int offset2 = (ppi->frame % FRAMECOUNT);
|
||||
|
||||
do
|
||||
{
|
||||
__declspec(align(16)) unsigned short counts[8];
|
||||
__declspec(align(16)) unsigned short sums[8];
|
||||
__asm
|
||||
{
|
||||
|
||||
mov eax, offset2
|
||||
mov edi, s // source pixels
|
||||
pxor mm1, mm1 // accumulator
|
||||
pxor mm7, mm7
|
||||
|
||||
mov esi, frameptr // accumulator
|
||||
pxor mm2, mm2 // count
|
||||
|
||||
movd mm3, DWORD PTR [edi]
|
||||
movd DWORD PTR [esi+4*eax], mm3
|
||||
|
||||
punpcklbw mm3, mm2 // mm3 source pixels
|
||||
mov ecx, FRAMECOUNT
|
||||
|
||||
next_frame:
|
||||
movd mm4, DWORD PTR [esi] // get frame buffer values
|
||||
punpcklbw mm4, mm7 // mm4 frame buffer pixels
|
||||
movq mm6, mm4 // save the pixel values
|
||||
psubsw mm4, mm3 // subtracted pixel values
|
||||
pmullw mm4, mm4 // square mm4
|
||||
movd mm5, strength
|
||||
psrlw mm4, mm5 // should be strength
|
||||
pmullw mm4, threes // 3 * modifier
|
||||
movq mm5, sixteens // 16s
|
||||
psubusw mm5, mm4 // 16 - modifiers
|
||||
movq mm4, mm5 // save the modifiers
|
||||
pmullw mm4, mm6 // multiplier values
|
||||
paddusw mm1, mm4 // accumulator
|
||||
paddusw mm2, mm5 // count
|
||||
add esi, 4 // next frame
|
||||
dec ecx // next set of eight pixels
|
||||
jnz next_frame
|
||||
|
||||
movq counts, mm2
|
||||
psrlw mm2, 1 // divide count by 2 for rounding
|
||||
paddusw mm1, mm2 // rounding added in
|
||||
|
||||
mov frameptr, esi
|
||||
|
||||
movq sums, mm1
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
|
||||
blurvalue >>= 16;
|
||||
d[i] = blurvalue;
|
||||
}
|
||||
|
||||
s += 4;
|
||||
d += 4;
|
||||
byte += 4;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
|
||||
++ppi->frame;
|
||||
__asm emms
|
||||
}
|
||||
@@ -253,9 +253,10 @@ rq_zigzag_1c:
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
|
||||
; short *qcoeff_ptr,short *dequant_ptr,
|
||||
; short *inv_scan_order, short *round_ptr,
|
||||
; short *scan_mask, short *round_ptr,
|
||||
; short *quant_ptr, short *dqcoeff_ptr);
|
||||
global sym(vp8_fast_quantize_b_impl_sse2)
|
||||
sym(vp8_fast_quantize_b_impl_sse2):
|
||||
@@ -264,18 +265,32 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
; end prolog
|
||||
|
||||
ALIGN_STACK 16, rax
|
||||
|
||||
%define save_xmm6 0
|
||||
%define save_xmm7 16
|
||||
|
||||
%define vp8_fastquantizeb_stack_size save_xmm7 + 16
|
||||
|
||||
sub rsp, vp8_fastquantizeb_stack_size
|
||||
|
||||
movdqa XMMWORD PTR[rsp + save_xmm6], xmm6
|
||||
movdqa XMMWORD PTR[rsp + save_xmm7], xmm7
|
||||
|
||||
mov rdx, arg(0) ;coeff_ptr
|
||||
mov rcx, arg(2) ;dequant_ptr
|
||||
mov rax, arg(3) ;scan_mask
|
||||
mov rdi, arg(4) ;round_ptr
|
||||
mov rsi, arg(5) ;quant_ptr
|
||||
|
||||
movdqa xmm0, XMMWORD PTR[rdx]
|
||||
movdqa xmm4, XMMWORD PTR[rdx + 16]
|
||||
|
||||
movdqa xmm2, XMMWORD PTR[rdi] ;round lo
|
||||
movdqa xmm3, XMMWORD PTR[rdi + 16] ;round hi
|
||||
movdqa xmm6, XMMWORD PTR[rdi] ;round lo
|
||||
movdqa xmm7, XMMWORD PTR[rdi + 16] ;round hi
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm5, xmm4
|
||||
@@ -288,8 +303,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
psubw xmm1, xmm0 ;x = abs(z)
|
||||
psubw xmm5, xmm4 ;x = abs(z)
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm1, xmm6
|
||||
paddw xmm5, xmm7
|
||||
|
||||
pmulhw xmm1, XMMWORD PTR[rsi]
|
||||
pmulhw xmm5, XMMWORD PTR[rsi + 16]
|
||||
@@ -297,8 +312,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
mov rdi, arg(1) ;qcoeff_ptr
|
||||
mov rsi, arg(6) ;dqcoeff_ptr
|
||||
|
||||
movdqa xmm2, XMMWORD PTR[rcx]
|
||||
movdqa xmm3, XMMWORD PTR[rcx + 16]
|
||||
movdqa xmm6, XMMWORD PTR[rcx]
|
||||
movdqa xmm7, XMMWORD PTR[rcx + 16]
|
||||
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
@@ -308,47 +323,64 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
movdqa XMMWORD PTR[rdi], xmm1
|
||||
movdqa XMMWORD PTR[rdi + 16], xmm5
|
||||
|
||||
pmullw xmm2, xmm1
|
||||
pmullw xmm3, xmm5
|
||||
pmullw xmm6, xmm1
|
||||
pmullw xmm7, xmm5
|
||||
|
||||
mov rdi, arg(3) ;inv_scan_order
|
||||
movdqa xmm2, XMMWORD PTR[rax]
|
||||
movdqa xmm3, XMMWORD PTR[rax+16];
|
||||
|
||||
; Start with 16
|
||||
pxor xmm4, xmm4 ;clear all bits
|
||||
pxor xmm4, xmm4 ;clear all bits
|
||||
pcmpeqw xmm1, xmm4
|
||||
pcmpeqw xmm5, xmm4
|
||||
|
||||
pcmpeqw xmm4, xmm4 ;set all bits
|
||||
pcmpeqw xmm4, xmm4 ;set all bits
|
||||
pxor xmm1, xmm4
|
||||
pxor xmm5, xmm4
|
||||
|
||||
pand xmm1, XMMWORD PTR[rdi]
|
||||
pand xmm5, XMMWORD PTR[rdi+16]
|
||||
psrlw xmm1, 15
|
||||
psrlw xmm5, 15
|
||||
|
||||
pmaxsw xmm1, xmm5
|
||||
pmaddwd xmm1, xmm2
|
||||
pmaddwd xmm5, xmm3
|
||||
|
||||
; now down to 8
|
||||
pshufd xmm5, xmm1, 00001110b
|
||||
movq xmm2, xmm1
|
||||
movq xmm3, xmm5
|
||||
|
||||
pmaxsw xmm1, xmm5
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm5, 8
|
||||
|
||||
; only 4 left
|
||||
pshuflw xmm5, xmm1, 00001110b
|
||||
paddd xmm1, xmm5
|
||||
paddd xmm2, xmm3
|
||||
|
||||
pmaxsw xmm1, xmm5
|
||||
paddd xmm1, xmm2
|
||||
movq xmm5, xmm1
|
||||
|
||||
; okay, just 2!
|
||||
pshuflw xmm5, xmm1, 00000001b
|
||||
psrldq xmm1, 4
|
||||
paddd xmm5, xmm1
|
||||
|
||||
pmaxsw xmm1, xmm5
|
||||
movq rcx, xmm5
|
||||
and rcx, 0xffff
|
||||
|
||||
movd rax, xmm1
|
||||
and rax, 0xff
|
||||
xor rdx, rdx
|
||||
sub rdx, rcx
|
||||
|
||||
movdqa XMMWORD PTR[rsi], xmm2 ;store dqcoeff
|
||||
movdqa XMMWORD PTR[rsi + 16], xmm3 ;store dqcoeff
|
||||
bsr rax, rcx
|
||||
inc rax
|
||||
|
||||
sar rdx, 31
|
||||
and rax, rdx
|
||||
|
||||
movdqa XMMWORD PTR[rsi], xmm6 ;store dqcoeff
|
||||
movdqa XMMWORD PTR[rsi + 16], xmm7 ;store dqcoeff
|
||||
|
||||
movdqa xmm6, XMMWORD PTR[rsp + save_xmm6]
|
||||
movdqa xmm7, XMMWORD PTR[rsp + save_xmm7]
|
||||
|
||||
add rsp, vp8_fastquantizeb_stack_size
|
||||
pop rsp
|
||||
|
||||
; begin epilog
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
|
||||
; short *qcoeff_ptr,short *dequant_ptr,
|
||||
; short *round_ptr,
|
||||
; short *quant_ptr, short *dqcoeff_ptr);
|
||||
;
|
||||
global sym(vp8_fast_quantize_b_impl_ssse3)
|
||||
sym(vp8_fast_quantize_b_impl_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(0) ;coeff_ptr
|
||||
mov rdi, arg(3) ;round_ptr
|
||||
mov rsi, arg(4) ;quant_ptr
|
||||
|
||||
movdqa xmm0, [rdx]
|
||||
movdqa xmm4, [rdx + 16]
|
||||
|
||||
movdqa xmm2, [rdi] ;round lo
|
||||
movdqa xmm3, [rdi + 16] ;round hi
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm5, xmm4
|
||||
|
||||
psraw xmm0, 15 ;sign of z (aka sz)
|
||||
psraw xmm4, 15 ;sign of z (aka sz)
|
||||
|
||||
pabsw xmm1, xmm1
|
||||
pabsw xmm5, xmm5
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm3
|
||||
|
||||
pmulhw xmm1, [rsi]
|
||||
pmulhw xmm5, [rsi + 16]
|
||||
|
||||
mov rdi, arg(1) ;qcoeff_ptr
|
||||
mov rcx, arg(2) ;dequant_ptr
|
||||
mov rsi, arg(5) ;dqcoeff_ptr
|
||||
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
psubw xmm1, xmm0
|
||||
psubw xmm5, xmm4
|
||||
|
||||
movdqa [rdi], xmm1
|
||||
movdqa [rdi + 16], xmm5
|
||||
|
||||
movdqa xmm2, [rcx]
|
||||
movdqa xmm3, [rcx + 16]
|
||||
|
||||
pxor xmm4, xmm4
|
||||
pmullw xmm2, xmm1
|
||||
pmullw xmm3, xmm5
|
||||
|
||||
pcmpeqw xmm1, xmm4 ;non zero mask
|
||||
pcmpeqw xmm5, xmm4 ;non zero mask
|
||||
packsswb xmm1, xmm5
|
||||
pshufb xmm1, [ GLOBAL(zz_shuf)]
|
||||
|
||||
pmovmskb edx, xmm1
|
||||
|
||||
; xor ecx, ecx
|
||||
; mov eax, -1
|
||||
;find_eob_loop:
|
||||
; shr edx, 1
|
||||
; jc fq_skip
|
||||
; mov eax, ecx
|
||||
;fq_skip:
|
||||
; inc ecx
|
||||
; cmp ecx, 16
|
||||
; jne find_eob_loop
|
||||
xor rdi, rdi
|
||||
mov eax, -1
|
||||
xor dx, ax ;flip the bits for bsr
|
||||
bsr eax, edx
|
||||
|
||||
movdqa [rsi], xmm2 ;store dqcoeff
|
||||
movdqa [rsi + 16], xmm3 ;store dqcoeff
|
||||
|
||||
sub edi, edx ;check for all zeros in bit mask
|
||||
sar edi, 31 ;0 or -1
|
||||
add eax, 1
|
||||
and eax, edi ;if the bit mask was all zero,
|
||||
;then eob = 0
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
zz_shuf:
|
||||
db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,353 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro PROCESS_16X2X8 1
|
||||
%if %1
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm1, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm1, xmm3
|
||||
paddw xmm1, xmm4
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
movq xmm2, MMWORD PTR [rdi+ rdx+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X8 1
|
||||
%if %1
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm1, xmm2
|
||||
%else
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movq xmm0, MMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_4X2X8 1
|
||||
%if %1
|
||||
movd xmm0, [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
%else
|
||||
movd xmm0, [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movd xmm0, [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
|
||||
;void vp8_sad16x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array);
|
||||
global sym(vp8_sad16x16x8_sse4)
|
||||
sym(vp8_sad16x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad16x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad16x8x8_sse4)
|
||||
sym(vp8_sad16x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x8x8_sse4)
|
||||
sym(vp8_sad8x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x16x8_sse4)
|
||||
sym(vp8_sad8x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad4x4x8_c(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad4x4x8_sse4)
|
||||
sym(vp8_sad4x4x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_4X2X8 1
|
||||
PROCESS_4X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -297,31 +297,4 @@ extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_variance_sad16x16x8
|
||||
#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4
|
||||
|
||||
#undef vp8_variance_sad16x8x8
|
||||
#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4
|
||||
|
||||
#undef vp8_variance_sad8x16x8
|
||||
#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4
|
||||
|
||||
#undef vp8_variance_sad8x8x8
|
||||
#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4
|
||||
|
||||
#undef vp8_variance_sad4x4x8
|
||||
#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -18,10 +18,11 @@
|
||||
#if HAVE_MMX
|
||||
void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
|
||||
{
|
||||
vp8_short_fdct4x4_mmx(input, output, pitch);
|
||||
vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
|
||||
vp8_short_fdct4x4_c(input, output, pitch);
|
||||
vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
|
||||
}
|
||||
|
||||
|
||||
int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
short *scan_mask, short *round_ptr,
|
||||
@@ -32,7 +33,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *zbin_ptr = b->zbin;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *quant_ptr = b->quant;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
@@ -81,16 +82,22 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
|
||||
{
|
||||
vp8_short_fdct4x4_sse2(input, output, pitch);
|
||||
vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
|
||||
}
|
||||
|
||||
int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
const short *inv_scan_order, short *round_ptr,
|
||||
short *scan_mask, short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *quant_ptr = b->quant;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
@@ -99,7 +106,8 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
coeff_ptr,
|
||||
qcoeff_ptr,
|
||||
dequant_ptr,
|
||||
vp8_default_inv_zig_zag,
|
||||
scan_mask,
|
||||
|
||||
round_ptr,
|
||||
quant_ptr,
|
||||
dqcoeff_ptr
|
||||
@@ -171,25 +179,6 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
d->eob = vp8_fast_quantize_b_impl_ssse3(
|
||||
b->coeff,
|
||||
d->qcoeff,
|
||||
d->dequant,
|
||||
b->round,
|
||||
b->quant_fast,
|
||||
d->dqcoeff
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -199,7 +188,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
int wmt_enabled = flags & HAS_SSE2;
|
||||
int SSE3Enabled = flags & HAS_SSE3;
|
||||
int SSSE3Enabled = flags & HAS_SSSE3;
|
||||
int SSE4_1Enabled = flags & HAS_SSE4_1;
|
||||
|
||||
/* Note:
|
||||
*
|
||||
@@ -210,6 +198,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
|
||||
/* Override default functions with fastest ones for this CPU. */
|
||||
#if HAVE_MMX
|
||||
|
||||
if (mmx_enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx;
|
||||
@@ -241,11 +230,18 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx;
|
||||
cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx;
|
||||
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx;
|
||||
|
||||
#if 0 // new fdct
|
||||
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
|
||||
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
|
||||
#else
|
||||
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
|
||||
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c;
|
||||
|
||||
#endif
|
||||
|
||||
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
|
||||
|
||||
@@ -258,9 +254,10 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
|
||||
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#if HAVE_SSE2
|
||||
|
||||
if (wmt_enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt;
|
||||
@@ -310,9 +307,10 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#if HAVE_SSE3
|
||||
|
||||
if (SSE3Enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3;
|
||||
@@ -330,30 +328,16 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3;
|
||||
cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
|
||||
if (SSSE3Enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
|
||||
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
if (SSE4_1Enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4;
|
||||
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4;
|
||||
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
|
||||
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
|
||||
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
|
||||
cpi->rtcd.search.full_search = vp8_full_search_sadx8;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ VP8_COMMON_SRCS-yes += common/type_aliases.h
|
||||
VP8_COMMON_SRCS-yes += common/pragmas.h
|
||||
|
||||
CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
|
||||
VP8_COMMON_SRCS-yes += common/preproc.h
|
||||
VP8_COMMON_SRCS-yes += common/vpxerrors.h
|
||||
|
||||
CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
|
||||
|
||||
@@ -37,7 +37,6 @@ struct vp8_extracfg
|
||||
unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */
|
||||
unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */
|
||||
unsigned int arnr_type; /* alt_ref filter type */
|
||||
vp8e_tuning tuning;
|
||||
|
||||
};
|
||||
|
||||
@@ -68,7 +67,6 @@ static const struct extraconfig_map extracfg_map[] =
|
||||
0, /* arnr_max_frames */
|
||||
3, /* arnr_strength */
|
||||
3, /* arnr_type*/
|
||||
0, /* tuning*/
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -106,7 +104,6 @@ update_error_state(vpx_codec_alg_priv_t *ctx,
|
||||
}
|
||||
|
||||
|
||||
#undef ERROR
|
||||
#define ERROR(str) do {\
|
||||
ctx->base.err_detail = str;\
|
||||
return VPX_CODEC_INVALID_PARAM;\
|
||||
@@ -135,8 +132,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
const vpx_codec_enc_cfg_t *cfg,
|
||||
const struct vp8_extracfg *vp8_cfg)
|
||||
{
|
||||
RANGE_CHECK(cfg, g_w, 1, 16384);
|
||||
RANGE_CHECK(cfg, g_h, 1, 16384);
|
||||
RANGE_CHECK(cfg, g_w, 2, 16384);
|
||||
RANGE_CHECK(cfg, g_h, 2, 16384);
|
||||
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
|
||||
RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
|
||||
RANGE_CHECK_HI(cfg, g_profile, 3);
|
||||
@@ -338,7 +335,6 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
|
||||
oxcf->arnr_strength = vp8_cfg.arnr_strength;
|
||||
oxcf->arnr_type = vp8_cfg.arnr_type;
|
||||
|
||||
oxcf->tuning = vp8_cfg.tuning;
|
||||
|
||||
/*
|
||||
printf("Current VP8 Settings: \n");
|
||||
@@ -452,7 +448,6 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
|
||||
MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames);
|
||||
MAP(VP8E_SET_ARNR_STRENGTH , xcfg.arnr_strength);
|
||||
MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type);
|
||||
MAP(VP8E_SET_TUNING, xcfg.tuning);
|
||||
|
||||
}
|
||||
|
||||
@@ -865,16 +860,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
|
||||
{
|
||||
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
vp8_ppflags_t flags = {0};
|
||||
|
||||
if (ctx->preview_ppcfg.post_proc_flag)
|
||||
{
|
||||
flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag;
|
||||
flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
|
||||
flags.noise_level = ctx->preview_ppcfg.noise_level;
|
||||
}
|
||||
|
||||
if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, &flags))
|
||||
if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, ctx->preview_ppcfg.deblocking_level, ctx->preview_ppcfg.noise_level, ctx->preview_ppcfg.post_proc_flag))
|
||||
{
|
||||
|
||||
/*
|
||||
@@ -1033,7 +1020,6 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] =
|
||||
{VP8E_SET_ARNR_MAXFRAMES, set_param},
|
||||
{VP8E_SET_ARNR_STRENGTH , set_param},
|
||||
{VP8E_SET_ARNR_TYPE , set_param},
|
||||
{VP8E_SET_TUNING, set_param},
|
||||
{ -1, NULL},
|
||||
};
|
||||
|
||||
|
||||
@@ -65,19 +65,12 @@ struct vpx_codec_alg_priv
|
||||
vpx_codec_priv_t base;
|
||||
vpx_codec_mmap_t mmaps[NELEMENTS(vp8_mem_req_segs)-1];
|
||||
vpx_codec_dec_cfg_t cfg;
|
||||
vp8_stream_info_t si;
|
||||
vp8_stream_info_t si;
|
||||
int defer_alloc;
|
||||
int decoder_init;
|
||||
VP8D_PTR pbi;
|
||||
int postproc_cfg_set;
|
||||
vp8_postproc_cfg_t postproc_cfg;
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
unsigned int dbg_postproc_flag;
|
||||
int dbg_color_ref_frame_flag;
|
||||
int dbg_color_mb_modes_flag;
|
||||
int dbg_color_b_modes_flag;
|
||||
int dbg_display_mv_flag;
|
||||
#endif
|
||||
vpx_image_t img;
|
||||
int img_setup;
|
||||
int img_avail;
|
||||
@@ -423,27 +416,15 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
{
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
INT64 time_stamp = 0, time_end_stamp = 0;
|
||||
vp8_ppflags_t flags = {0};
|
||||
int ppflag = 0;
|
||||
int ppdeblocking = 0;
|
||||
int ppnoise = 0;
|
||||
|
||||
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
|
||||
{
|
||||
flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
|
||||
| ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0)
|
||||
| ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
|
||||
| ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
|
||||
| ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0)
|
||||
#endif
|
||||
;
|
||||
flags.deblocking_level = ctx->postproc_cfg.deblocking_level;
|
||||
flags.noise_level = ctx->postproc_cfg.noise_level;
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag;
|
||||
flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
|
||||
flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag;
|
||||
flags.display_mv_flag = ctx->dbg_display_mv_flag;
|
||||
#endif
|
||||
ppflag = ctx->postproc_cfg.post_proc_flag;
|
||||
ppdeblocking = ctx->postproc_cfg.deblocking_level;
|
||||
ppnoise = ctx->postproc_cfg.noise_level;
|
||||
}
|
||||
|
||||
if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
|
||||
@@ -452,7 +433,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
res = update_error_state(ctx, &pbi->common.error);
|
||||
}
|
||||
|
||||
if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
|
||||
if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, ppdeblocking, ppnoise, ppflag))
|
||||
{
|
||||
/* Align width/height */
|
||||
unsigned int a_w = (sd.y_width + 15) & ~15;
|
||||
@@ -466,7 +447,6 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
vpx_img_set_rect(&ctx->img,
|
||||
VP8BORDERINPIXELS, VP8BORDERINPIXELS,
|
||||
sd.y_width, sd.y_height);
|
||||
ctx->img.user_priv = user_priv;
|
||||
ctx->img_avail = 1;
|
||||
|
||||
}
|
||||
@@ -666,59 +646,12 @@ static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx,
|
||||
#endif
|
||||
}
|
||||
|
||||
static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx,
|
||||
int ctrl_id,
|
||||
va_list args)
|
||||
{
|
||||
#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
|
||||
int data = va_arg(args, int);
|
||||
|
||||
#define MAP(id, var) case id: var = data; break;
|
||||
|
||||
switch (ctrl_id)
|
||||
{
|
||||
MAP (VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag);
|
||||
MAP (VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag);
|
||||
MAP (VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag);
|
||||
MAP (VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag);
|
||||
}
|
||||
|
||||
return VPX_CODEC_OK;
|
||||
#else
|
||||
return VPX_CODEC_INCAPABLE;
|
||||
#endif
|
||||
}
|
||||
|
||||
static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
|
||||
int ctrl_id,
|
||||
va_list args)
|
||||
{
|
||||
int *update_info = va_arg(args, int *);
|
||||
VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
|
||||
|
||||
if (update_info)
|
||||
{
|
||||
*update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME
|
||||
+ pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME
|
||||
+ pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME;
|
||||
|
||||
return VPX_CODEC_OK;
|
||||
}
|
||||
else
|
||||
return VPX_CODEC_INVALID_PARAM;
|
||||
}
|
||||
|
||||
|
||||
vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
|
||||
{
|
||||
{VP8_SET_REFERENCE, vp8_set_reference},
|
||||
{VP8_COPY_REFERENCE, vp8_get_reference},
|
||||
{VP8_SET_POSTPROC, vp8_set_postproc},
|
||||
{VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options},
|
||||
{VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options},
|
||||
{VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_options},
|
||||
{VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options},
|
||||
{VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates},
|
||||
{VP8_SET_REFERENCE, vp8_set_reference},
|
||||
{VP8_COPY_REFERENCE, vp8_get_reference},
|
||||
{VP8_SET_POSTPROC, vp8_set_postproc},
|
||||
{ -1, NULL},
|
||||
};
|
||||
|
||||
|
||||
@@ -109,8 +109,6 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
|
||||
|
||||
|
||||
34
vpx/vp8.h
34
vpx/vp8.h
@@ -38,13 +38,9 @@
|
||||
*/
|
||||
enum vp8_dec_control_id
|
||||
{
|
||||
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
|
||||
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
|
||||
VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
|
||||
VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */
|
||||
VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
|
||||
VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
|
||||
VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
|
||||
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
|
||||
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
|
||||
VP8_SET_POSTPROC = 3, /**< set decoder's the post processing settings */
|
||||
VP8_COMMON_CTRL_ID_MAX
|
||||
};
|
||||
|
||||
@@ -54,14 +50,10 @@ enum vp8_dec_control_id
|
||||
*/
|
||||
enum vp8_postproc_level
|
||||
{
|
||||
VP8_NOFILTERING = 0,
|
||||
VP8_DEBLOCK = 1<<0,
|
||||
VP8_DEMACROBLOCK = 1<<1,
|
||||
VP8_ADDNOISE = 1<<2,
|
||||
VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */
|
||||
VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */
|
||||
VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */
|
||||
VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */
|
||||
VP8_NOFILTERING = 0,
|
||||
VP8_DEBLOCK = 1,
|
||||
VP8_DEMACROBLOCK = 2,
|
||||
VP8_ADDNOISE = 4
|
||||
};
|
||||
|
||||
/*!\brief post process flags
|
||||
@@ -73,9 +65,9 @@ enum vp8_postproc_level
|
||||
|
||||
typedef struct vp8_postproc_cfg
|
||||
{
|
||||
int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
|
||||
int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
|
||||
int noise_level; /**< the strength of additive noise, valid range [0, 16] */
|
||||
int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
|
||||
int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
|
||||
int noise_level; /**< the strength of additive noise, valid range [0, 16] */
|
||||
} vp8_postproc_cfg_t;
|
||||
|
||||
/*!\brief reference frame type
|
||||
@@ -103,16 +95,12 @@ typedef struct vpx_ref_frame
|
||||
|
||||
/*!\brief vp8 decoder control funciton parameter type
|
||||
*
|
||||
* defines the data type for each of VP8 decoder control function requires
|
||||
* defines the data type for each of VP8 decoder control funciton requires
|
||||
*/
|
||||
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *)
|
||||
VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int)
|
||||
|
||||
|
||||
/*! @} - end defgroup vp8 */
|
||||
|
||||
17
vpx/vp8cx.h
17
vpx/vp8cx.h
@@ -140,8 +140,7 @@ enum vp8e_enc_control_id
|
||||
VP8E_SET_ARNR_MAXFRAMES, /**< control function to set the max number of frames blurred creating arf*/
|
||||
VP8E_SET_ARNR_STRENGTH , /**< control function to set the filter strength for the arf */
|
||||
VP8E_SET_ARNR_TYPE , /**< control function to set the type of filter to use for the arf*/
|
||||
VP8E_SET_TUNING, /**< control function to set visual tuning */
|
||||
};
|
||||
} ;
|
||||
|
||||
/*!\brief vpx 1-D scaling mode
|
||||
*
|
||||
@@ -225,18 +224,6 @@ typedef enum
|
||||
} vp8e_token_partitions;
|
||||
|
||||
|
||||
/*!\brief VP8 model tuning parameters
|
||||
*
|
||||
* Changes the encoder to tune for certain types of input material.
|
||||
*
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
VP8_TUNE_PSNR,
|
||||
VP8_TUNE_SSIM
|
||||
} vp8e_tuning;
|
||||
|
||||
|
||||
/*!\brief VP8 encoder control function parameter type
|
||||
*
|
||||
* Defines the data types that VP8E control functions take. Note that
|
||||
@@ -266,7 +253,7 @@ VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, vp8e_token_partitions)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning)
|
||||
|
||||
|
||||
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
|
||||
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
|
||||
|
||||
24
vpx/vp8dx.h
24
vpx/vp8dx.h
@@ -36,30 +36,6 @@ extern vpx_codec_iface_t* vpx_codec_vp8_dx(void);
|
||||
#include "vp8.h"
|
||||
|
||||
|
||||
/*!\brief VP8 decoder control functions
|
||||
*
|
||||
* The set of macros define the control functions of VP8 decoder interface
|
||||
*/
|
||||
enum vp8d_dec_control_id
|
||||
{
|
||||
VP8_DECODER_CTRL_ID_START = 256,
|
||||
VP8D_GET_LAST_REF_UPDATES, /**< control function to get info on which reference frames were updated
|
||||
by the last decode */
|
||||
VP8_DECODER_CTRL_ID_MAX
|
||||
} ;
|
||||
|
||||
|
||||
/*!\brief VP8 encoder control function parameter type
|
||||
*
|
||||
* Defines the data types that VP8E control functions take. Note that
|
||||
* additional common controls are defined in vp8.h
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *)
|
||||
|
||||
|
||||
/*! @} - end defgroup vp8_decoder */
|
||||
|
||||
|
||||
|
||||
@@ -74,7 +74,6 @@ void __cpuid(int CPUInfo[4], int info_type);
|
||||
#define HAS_SSE2 0x04
|
||||
#define HAS_SSE3 0x08
|
||||
#define HAS_SSSE3 0x10
|
||||
#define HAS_SSE4_1 0x20
|
||||
#ifndef BIT
|
||||
#define BIT(n) (1<<n)
|
||||
#endif
|
||||
@@ -118,8 +117,6 @@ x86_simd_caps(void)
|
||||
|
||||
if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
|
||||
|
||||
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
|
||||
|
||||
return flags & mask;
|
||||
}
|
||||
|
||||
|
||||
94
vpxdec.c
94
vpxdec.c
@@ -35,7 +35,6 @@
|
||||
#if CONFIG_MD5
|
||||
#include "md5_utils.h"
|
||||
#endif
|
||||
#include "tools_common.h"
|
||||
#include "nestegg/include/nestegg/nestegg.h"
|
||||
|
||||
#ifndef PATH_MAX
|
||||
@@ -108,19 +107,11 @@ static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level",
|
||||
"Enable VP8 demacroblocking, w/ level");
|
||||
static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
|
||||
"Enable VP8 visible debug info");
|
||||
static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
|
||||
"Display only selected reference frame per macro block");
|
||||
static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
|
||||
"Display only selected macro block modes");
|
||||
static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
|
||||
"Display only selected block modes");
|
||||
static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
|
||||
"Draw only selected motion vectors");
|
||||
|
||||
|
||||
static const arg_def_t *vp8_pp_args[] =
|
||||
{
|
||||
&addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
|
||||
&pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs,
|
||||
NULL
|
||||
};
|
||||
#endif
|
||||
@@ -323,8 +314,7 @@ void *out_open(const char *out_fn, int do_md5)
|
||||
}
|
||||
else
|
||||
{
|
||||
FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb")
|
||||
: set_binary_mode(stdout);
|
||||
FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout;
|
||||
|
||||
if (!outfile)
|
||||
{
|
||||
@@ -442,8 +432,6 @@ unsigned int file_is_raw(FILE *infile,
|
||||
int is_raw = 0;
|
||||
vpx_codec_stream_info_t si;
|
||||
|
||||
si.sz = sizeof(si);
|
||||
|
||||
if (fread(buf, 1, 32, infile) == 32)
|
||||
{
|
||||
int i;
|
||||
@@ -552,7 +540,6 @@ webm_guess_framerate(struct input_ctx *input,
|
||||
*fps_den = tstamp / 1000;
|
||||
return 0;
|
||||
fail:
|
||||
nestegg_destroy(input->nestegg_ctx);
|
||||
input->nestegg_ctx = NULL;
|
||||
rewind(input->infile);
|
||||
return 1;
|
||||
@@ -715,10 +702,6 @@ int main(int argc, const char **argv_)
|
||||
vpx_codec_dec_cfg_t cfg = {0};
|
||||
#if CONFIG_VP8_DECODER
|
||||
vp8_postproc_cfg_t vp8_pp_cfg = {0};
|
||||
int vp8_dbg_color_ref_frame = 0;
|
||||
int vp8_dbg_color_mb_modes = 0;
|
||||
int vp8_dbg_color_b_modes = 0;
|
||||
int vp8_dbg_display_mv = 0;
|
||||
#endif
|
||||
struct input_ctx input = {0};
|
||||
|
||||
@@ -804,42 +787,6 @@ int main(int argc, const char **argv_)
|
||||
if (level)
|
||||
vp8_pp_cfg.post_proc_flag |= level;
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_ref_frame, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_color_ref_frame = flags;
|
||||
}
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_mb_modes, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_color_mb_modes = flags;
|
||||
}
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_b_modes, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_color_b_modes = flags;
|
||||
}
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_mvs, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_display_mv = flags;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
else
|
||||
@@ -858,7 +805,7 @@ int main(int argc, const char **argv_)
|
||||
usage_exit();
|
||||
|
||||
/* Open file */
|
||||
infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
|
||||
infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
|
||||
|
||||
if (!infile)
|
||||
{
|
||||
@@ -929,13 +876,7 @@ int main(int argc, const char **argv_)
|
||||
}
|
||||
|
||||
if(input.kind == WEBM_FILE)
|
||||
if(webm_guess_framerate(&input, &fps_den, &fps_num))
|
||||
{
|
||||
fprintf(stderr, "Failed to guess framerate -- error parsing "
|
||||
"webm file?\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
webm_guess_framerate(&input, &fps_den, &fps_num);
|
||||
|
||||
/*Note: We can't output an aspect ratio here because IVF doesn't
|
||||
store one, and neither does VP8.
|
||||
@@ -979,33 +920,6 @@ int main(int argc, const char **argv_)
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_color_ref_frame
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_color_mb_modes
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_color_b_modes
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_display_mv
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Decode file */
|
||||
|
||||
107
vpxenc.c
Executable file → Normal file
107
vpxenc.c
Executable file → Normal file
@@ -35,11 +35,9 @@
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx_ports/mem_ops.h"
|
||||
#include "vpx_ports/vpx_timer.h"
|
||||
#include "tools_common.h"
|
||||
#include "y4minput.h"
|
||||
#include "libmkv/EbmlWriter.h"
|
||||
#include "libmkv/EbmlIDs.h"
|
||||
#include "experimental.h"
|
||||
|
||||
/* Need special handling of these functions on Windows */
|
||||
#if defined(_MSC_VER)
|
||||
@@ -187,11 +185,11 @@ int stats_open_mem(stats_io_t *stats, int pass)
|
||||
}
|
||||
|
||||
|
||||
void stats_close(stats_io_t *stats, int last_pass)
|
||||
void stats_close(stats_io_t *stats)
|
||||
{
|
||||
if (stats->file)
|
||||
{
|
||||
if (stats->pass == last_pass)
|
||||
if (stats->pass == 1)
|
||||
{
|
||||
#if 0
|
||||
#elif USE_POSIX_MMAP
|
||||
@@ -206,7 +204,7 @@ void stats_close(stats_io_t *stats, int last_pass)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (stats->pass == last_pass)
|
||||
if (stats->pass == 1)
|
||||
free(stats->buf.buf);
|
||||
}
|
||||
}
|
||||
@@ -252,8 +250,7 @@ enum video_file_type
|
||||
|
||||
struct detect_buffer {
|
||||
char buf[4];
|
||||
size_t buf_read;
|
||||
size_t position;
|
||||
int valid;
|
||||
};
|
||||
|
||||
|
||||
@@ -307,21 +304,14 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
|
||||
|
||||
for (r = 0; r < h; r++)
|
||||
{
|
||||
size_t needed = w;
|
||||
size_t buf_position = 0;
|
||||
const size_t left = detect->buf_read - detect->position;
|
||||
if (left > 0)
|
||||
if (detect->valid)
|
||||
{
|
||||
const size_t more = (left < needed) ? left : needed;
|
||||
memcpy(ptr, detect->buf + detect->position, more);
|
||||
buf_position = more;
|
||||
needed -= more;
|
||||
detect->position += more;
|
||||
}
|
||||
if (needed > 0)
|
||||
{
|
||||
shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
|
||||
memcpy(ptr, detect->buf, 4);
|
||||
shortread |= fread(ptr+4, 1, w-4, f) < w-4;
|
||||
detect->valid = 0;
|
||||
}
|
||||
else
|
||||
shortread |= fread(ptr, 1, w, f) < w;
|
||||
|
||||
ptr += img->stride[plane];
|
||||
}
|
||||
@@ -348,12 +338,12 @@ unsigned int file_is_ivf(FILE *infile,
|
||||
unsigned int *fourcc,
|
||||
unsigned int *width,
|
||||
unsigned int *height,
|
||||
struct detect_buffer *detect)
|
||||
char detect[4])
|
||||
{
|
||||
char raw_hdr[IVF_FILE_HDR_SZ];
|
||||
int is_ivf = 0;
|
||||
|
||||
if(memcmp(detect->buf, "DKIF", 4) != 0)
|
||||
if(memcmp(detect, "DKIF", 4) != 0)
|
||||
return 0;
|
||||
|
||||
/* See write_ivf_file_header() for more documentation on the file header
|
||||
@@ -377,7 +367,6 @@ unsigned int file_is_ivf(FILE *infile,
|
||||
{
|
||||
*width = mem_get_le16(raw_hdr + 12);
|
||||
*height = mem_get_le16(raw_hdr + 14);
|
||||
detect->position = 4;
|
||||
}
|
||||
|
||||
return is_ivf;
|
||||
@@ -445,7 +434,7 @@ struct EbmlGlobal
|
||||
int debug;
|
||||
|
||||
FILE *stream;
|
||||
int64_t last_pts_ms;
|
||||
uint64_t last_pts_ms;
|
||||
vpx_rational_t framerate;
|
||||
|
||||
/* These pointers are to the start of an element */
|
||||
@@ -658,7 +647,7 @@ write_webm_block(EbmlGlobal *glob,
|
||||
unsigned char track_number;
|
||||
unsigned short block_timecode = 0;
|
||||
unsigned char flags;
|
||||
int64_t pts_ms;
|
||||
uint64_t pts_ms;
|
||||
int start_cluster = 0, is_keyframe;
|
||||
|
||||
/* Calculate the PTS of this frame in milliseconds */
|
||||
@@ -989,32 +978,23 @@ static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
|
||||
static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
|
||||
"Enable automatic alt reference frames");
|
||||
static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
|
||||
"AltRef Max Frames");
|
||||
"alt_ref Max Frames");
|
||||
static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1,
|
||||
"AltRef Strength");
|
||||
"alt_ref Strength");
|
||||
static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1,
|
||||
"AltRef Type");
|
||||
static const struct arg_enum_list tuning_enum[] = {
|
||||
{"psnr", VP8_TUNE_PSNR},
|
||||
{"ssim", VP8_TUNE_SSIM},
|
||||
{NULL, 0}
|
||||
};
|
||||
static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1,
|
||||
"Material to favor", tuning_enum);
|
||||
"alt_ref Type");
|
||||
|
||||
static const arg_def_t *vp8_args[] =
|
||||
{
|
||||
&cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
|
||||
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
|
||||
&tune_ssim, NULL
|
||||
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, NULL
|
||||
};
|
||||
static const int vp8_arg_ctrl_map[] =
|
||||
{
|
||||
VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
|
||||
VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
|
||||
VP8E_SET_TOKEN_PARTITIONS,
|
||||
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE,
|
||||
VP8E_SET_TUNING, 0
|
||||
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE, 0
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -1040,7 +1020,6 @@ static void usage_exit()
|
||||
#if CONFIG_VP8_ENCODER
|
||||
fprintf(stderr, "\nVP8 Specific Options:\n");
|
||||
arg_show_usage(stdout, vp8_args);
|
||||
xxx_show_usage(stdout);
|
||||
#endif
|
||||
fprintf(stderr, "\n"
|
||||
"Included encoders:\n"
|
||||
@@ -1094,7 +1073,6 @@ int main(int argc, const char **argv_)
|
||||
int psnr_count = 0;
|
||||
|
||||
exec_name = argv_[0];
|
||||
ebml.last_pts_ms = -1;
|
||||
|
||||
if (argc < 3)
|
||||
usage_exit();
|
||||
@@ -1175,7 +1153,6 @@ int main(int argc, const char **argv_)
|
||||
out_fn = arg.val;
|
||||
else if (arg_match(&arg, &debugmode, argi))
|
||||
ebml.debug = 1;
|
||||
else if (xxx_parse_arg(argi));
|
||||
else
|
||||
argj++;
|
||||
}
|
||||
@@ -1212,12 +1189,6 @@ int main(int argc, const char **argv_)
|
||||
*/
|
||||
cfg.g_timebase.den = 1000;
|
||||
|
||||
/* Never use the library's default resolution, require it be parsed
|
||||
* from the file or set on the command line.
|
||||
*/
|
||||
cfg.g_w = 0;
|
||||
cfg.g_h = 0;
|
||||
|
||||
/* Now parse the remainder of the parameters. */
|
||||
for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
|
||||
{
|
||||
@@ -1329,7 +1300,7 @@ int main(int argc, const char **argv_)
|
||||
if (arg_ctrl_cnt < ARG_CTRL_CNT_MAX)
|
||||
{
|
||||
arg_ctrls[arg_ctrl_cnt][0] = ctrl_args_map[i];
|
||||
arg_ctrls[arg_ctrl_cnt][1] = arg_parse_enum_or_int(&arg);
|
||||
arg_ctrls[arg_ctrl_cnt][1] = arg_parse_int(&arg);
|
||||
arg_ctrl_cnt++;
|
||||
}
|
||||
}
|
||||
@@ -1359,11 +1330,11 @@ int main(int argc, const char **argv_)
|
||||
{
|
||||
int frames_in = 0, frames_out = 0;
|
||||
unsigned long nbytes = 0;
|
||||
size_t detect_bytes;
|
||||
struct detect_buffer detect;
|
||||
|
||||
/* Parse certain options from the input file, if possible */
|
||||
infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb")
|
||||
: set_binary_mode(stdin);
|
||||
infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") : stdin;
|
||||
|
||||
if (!infile)
|
||||
{
|
||||
@@ -1373,11 +1344,13 @@ int main(int argc, const char **argv_)
|
||||
|
||||
/* For RAW input sources, these bytes will applied on the first frame
|
||||
* in read_frame().
|
||||
* We can always read 4 bytes because the minimum supported frame size
|
||||
* is 2x2.
|
||||
*/
|
||||
detect.buf_read = fread(detect.buf, 1, 4, infile);
|
||||
detect.position = 0;
|
||||
detect_bytes = fread(detect.buf, 1, 4, infile);
|
||||
detect.valid = 0;
|
||||
|
||||
if (detect.buf_read == 4 && file_is_y4m(infile, &y4m, detect.buf))
|
||||
if (detect_bytes == 4 && file_is_y4m(infile, &y4m, detect.buf))
|
||||
{
|
||||
if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0)
|
||||
{
|
||||
@@ -1402,8 +1375,8 @@ int main(int argc, const char **argv_)
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
else if (detect.buf_read == 4 &&
|
||||
file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, &detect))
|
||||
else if (detect_bytes == 4 &&
|
||||
file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
|
||||
{
|
||||
file_type = FILE_TYPE_IVF;
|
||||
switch (fourcc)
|
||||
@@ -1422,15 +1395,8 @@ int main(int argc, const char **argv_)
|
||||
else
|
||||
{
|
||||
file_type = FILE_TYPE_RAW;
|
||||
detect.valid = 1;
|
||||
}
|
||||
|
||||
if(!cfg.g_w || !cfg.g_h)
|
||||
{
|
||||
fprintf(stderr, "Specify stream dimensions with --width (-w) "
|
||||
" and --height (-h).\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
#define SHOW(field) fprintf(stderr, " %-28s = %d\n", #field, cfg.field)
|
||||
|
||||
if (verbose && pass == 0)
|
||||
@@ -1483,8 +1449,7 @@ int main(int argc, const char **argv_)
|
||||
cfg.g_w, cfg.g_h, 1);
|
||||
}
|
||||
|
||||
outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb")
|
||||
: set_binary_mode(stdout);
|
||||
outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
|
||||
|
||||
if (!outfile)
|
||||
{
|
||||
@@ -1562,7 +1527,7 @@ int main(int argc, const char **argv_)
|
||||
vpx_codec_iter_t iter = NULL;
|
||||
const vpx_codec_cx_pkt_t *pkt;
|
||||
struct vpx_usec_timer timer;
|
||||
int64_t frame_start, next_frame_start;
|
||||
int64_t frame_start;
|
||||
|
||||
if (!arg_limit || frames_in < arg_limit)
|
||||
{
|
||||
@@ -1583,11 +1548,9 @@ int main(int argc, const char **argv_)
|
||||
|
||||
frame_start = (cfg.g_timebase.den * (int64_t)(frames_in - 1)
|
||||
* arg_framerate.den) / cfg.g_timebase.num / arg_framerate.num;
|
||||
next_frame_start = (cfg.g_timebase.den * (int64_t)(frames_in)
|
||||
* arg_framerate.den)
|
||||
/ cfg.g_timebase.num / arg_framerate.num;
|
||||
vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, frame_start,
|
||||
next_frame_start - frame_start,
|
||||
cfg.g_timebase.den * arg_framerate.den
|
||||
/ cfg.g_timebase.num / arg_framerate.num,
|
||||
0, arg_deadline);
|
||||
vpx_usec_timer_mark(&timer);
|
||||
cx_time += vpx_usec_timer_elapsed(&timer);
|
||||
@@ -1695,7 +1658,7 @@ int main(int argc, const char **argv_)
|
||||
}
|
||||
|
||||
fclose(outfile);
|
||||
stats_close(&stats, arg_passes-1);
|
||||
stats_close(&stats);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
if (one_pass_only)
|
||||
|
||||
Reference in New Issue
Block a user