Merge remote branch 'origin/master' into experimental

Conflicts:
	vp8/encoder/encodeframe.c
	vp8/encoder/rdopt.c

Change-Id: I8bab720889ac652361abdedfe2cc91a89742cb30
This commit is contained in:
Johann 2011-06-17 14:56:27 -04:00
commit e18d7bc230
44 changed files with 840 additions and 1278 deletions

View File

@ -124,6 +124,12 @@ $(BUILD_PFX)%.s.o: %.s
$(if $(quiet),@echo " [AS] $@")
$(qexec)$(AS) $(ASFLAGS) -o $@ $<
.PRECIOUS: %.c.S
%.c.S: CFLAGS += -DINLINE_ASM
$(BUILD_PFX)%.c.S: %.c
$(if $(quiet),@echo " [GEN] $@")
$(qexec)$(CC) -S $(CFLAGS) -o $@ $<
.PRECIOUS: %.asm.s
$(BUILD_PFX)%.asm.s: %.asm
$(if $(quiet),@echo " [ASM CONVERSION] $@")

View File

@ -412,11 +412,14 @@ EOF
write_common_target_config_h() {
cat > ${TMP_H} << EOF
/* This file automatically generated by configure. Do not edit! */
#ifndef VPX_CONFIG_H
#define VPX_CONFIG_H
#define RESTRICT ${RESTRICT}
EOF
print_config_h ARCH "${TMP_H}" ${ARCH_LIST}
print_config_h HAVE "${TMP_H}" ${HAVE_LIST}
print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST}
echo "#endif /* VPX_CONFIG_H */" >> ${TMP_H}
mkdir -p `dirname "$1"`
cmp "$1" ${TMP_H} >/dev/null 2>&1 || mv ${TMP_H} "$1"
}
@ -689,7 +692,7 @@ process_common_toolchain() {
if enabled armv7
then
check_add_cflags --cpu=Cortex-A8 --fpu=softvfp+vfpv3
check_add_asflags --cpu=Cortex-A8 --fpu=none
check_add_asflags --cpu=Cortex-A8 --fpu=softvfp+vfpv3
else
check_add_cflags --cpu=${tgt_isa##armv}
check_add_asflags --cpu=${tgt_isa##armv}
@ -751,41 +754,24 @@ process_common_toolchain() {
linux*)
enable linux
if enabled rvct; then
# Compiling with RVCT requires an alternate libc (glibc) when
# targetting linux.
disabled builtin_libc \
|| die "Must supply --libc when targetting *-linux-rvct"
# Check if we have CodeSourcery GCC in PATH. Needed for
# libraries
hash arm-none-linux-gnueabi-gcc 2>&- || \
die "Couldn't find CodeSourcery GCC from PATH"
# Set up compiler
add_cflags --library_interface=aeabi_glibc
add_cflags --no_hide_all
add_cflags --dwarf2
# Use armcc as a linker to enable translation of
# some gcc specific options such as -lm and -lpthread.
LD="armcc --translate_gcc"
# Set up linker
add_ldflags --sysv --no_startup --no_ref_cpp_init
add_ldflags --entry=_start
add_ldflags --keep '"*(.init)"' --keep '"*(.fini)"'
add_ldflags --keep '"*(.init_array)"' --keep '"*(.fini_array)"'
add_ldflags --dynamiclinker=/lib/ld-linux.so.3
add_extralibs libc.so.6 -lc_nonshared crt1.o crti.o crtn.o
# create configuration file (uses path to CodeSourcery GCC)
armcc --arm_linux_configure --arm_linux_config_file=arm_linux.cfg
# Add the paths for the alternate libc
for d in usr/include; do
try_dir="${alt_libc}/${d}"
[ -d "${try_dir}" ] && add_cflags -J"${try_dir}"
done
add_cflags -J"${RVCT31INC}"
for d in lib usr/lib; do
try_dir="${alt_libc}/${d}"
[ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
done
# glibc has some struct members named __align, which is a
# storage modifier in RVCT. If we need to use this modifier,
# we'll have to #undef it in our code. Note that this must
# happen AFTER all libc inclues.
add_cflags -D__align=x_align_x
add_cflags --arm_linux_paths --arm_linux_config_file=arm_linux.cfg
add_asflags --no_hide_all --apcs=/interwork
add_ldflags --arm_linux_paths --arm_linux_config_file=arm_linux.cfg
enabled pic && add_cflags --apcs=/fpic
enabled pic && add_asflags --apcs=/fpic
enabled shared && add_cflags --shared
fi
;;
@ -953,9 +939,13 @@ process_common_toolchain() {
enabled gcov &&
check_add_cflags -fprofile-arcs -ftest-coverage &&
check_add_ldflags -fprofile-arcs -ftest-coverage
if enabled optimizations; then
enabled rvct && check_add_cflags -Otime
enabled small && check_add_cflags -O2 || check_add_cflags -O3
if enabled rvct; then
enabled small && check_add_cflags -Ospace || check_add_cflags -Otime
else
enabled small && check_add_cflags -O2 || check_add_cflags -O3
fi
fi
# Position Independent Code (PIC) support, for building relocatable

View File

@ -365,7 +365,7 @@ generate_vcproj() {
DebugInformationFormat="1" \
Detect64BitPortabilityProblems="true" \
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1"
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
;;
*)
tag Tool \
@ -379,7 +379,7 @@ generate_vcproj() {
DebugInformationFormat="1" \
Detect64BitPortabilityProblems="true" \
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1"
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
;;
esac
;;

50
libs.mk
View File

@ -184,7 +184,7 @@ BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
LIBVPX_SO := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)
$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) libvpx.ver
$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm -pthread
$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
$(BUILD_PFX)$(LIBVPX_SO): SO_VERSION_SCRIPT = libvpx.ver
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
@ -257,36 +257,44 @@ $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
#
# Calculate platform- and compiler-specific offsets for hand coded assembly
#
ifeq ($(CONFIG_EXTERNAL_BUILD),) # Visual Studio uses obj_int_extract.bat
ifeq ($(ARCH_ARM), yes)
ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.S
grep EQU $< | tr -d '$$\#' $(ADS2GAS) > $@
$(VP8_PREFIX)common/asm_com_offsets.c.S: vp8/common/asm_com_offsets.c
CLEAN-OBJS += asm_com_offsets.asm $(VP8_PREFIX)common/asm_com_offsets.c.S
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.S
grep EQU $< | tr -d '$$\#' $(ADS2GAS) > $@
$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: vp8/encoder/asm_enc_offsets.c
CLEAN-OBJS += asm_enc_offsets.asm $(VP8_PREFIX)encoder/asm_enc_offsets.c.S
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.S
grep EQU $< | tr -d '$$\#' $(ADS2GAS) > $@
$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: vp8/decoder/asm_dec_offsets.c
CLEAN-OBJS += asm_dec_offsets.asm $(VP8_PREFIX)decoder/asm_dec_offsets.c.S
else
ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
asm_com_offsets.asm: obj_int_extract
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
./obj_int_extract rvds $< $(ADS2GAS) > $@
OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
CLEAN-OBJS += asm_com_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
endif
ifeq ($(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64), yes)
ifeq ($(CONFIG_VP8_ENCODER), yes)
asm_enc_offsets.asm: obj_int_extract
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
asm_enc_offsets.asm: obj_int_extract
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
./obj_int_extract rvds $< $(ADS2GAS) > $@
OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
CLEAN-OBJS += asm_enc_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
endif
endif
OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
CLEAN-OBJS += asm_enc_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
ifeq ($(ARCH_ARM), yes)
ifeq ($(CONFIG_VP8_DECODER), yes)
asm_dec_offsets.asm: obj_int_extract
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
asm_dec_offsets.asm: obj_int_extract
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
./obj_int_extract rvds $< $(ADS2GAS) > $@
OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
CLEAN-OBJS += asm_dec_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
endif
OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
CLEAN-OBJS += asm_dec_offsets.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
endif
endif

View File

@ -9,27 +9,12 @@
*/
#include "vpx_ports/config.h"
#include <stddef.h>
#include "vpx_ports/asm_offsets.h"
#include "vpx_scale/yv12config.h"
#define ct_assert(name,cond) \
static void assert_##name(void) UNUSED;\
static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
BEGIN
#define DEFINE(sym, val) int sym = val;
/*
#define BLANK() asm volatile("\n->" : : )
*/
/*
* int main(void)
* {
*/
//vpx_scale
/* vpx_scale */
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
@ -41,9 +26,7 @@ DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_b
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
//add asserts for any offset that is not supported by assembly code
//add asserts for any size that is not supported by assembly code
/*
* return 0;
* }
*/
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */

View File

@ -252,6 +252,9 @@ typedef struct
int mb_to_top_edge;
int mb_to_bottom_edge;
int ref_frame_cost[MAX_REF_FRAMES];
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
vp8_subpix_fn_t subpixel_predict;

View File

@ -123,7 +123,21 @@ static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
{
/* On L edge, get from MB to left of us */
--cur_mb;
b += 4;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 3)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 1)->as_mode;
@ -135,7 +149,22 @@ static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
b += 16;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 12)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 4)->as_mode;

View File

@ -9,21 +9,10 @@
*/
#include "vpx_ports/config.h"
#include <stddef.h>
#include "vpx_ports/asm_offsets.h"
#include "onyxd_int.h"
#define DEFINE(sym, val) int sym = val;
/*
#define BLANK() asm volatile("\n->" : : )
*/
/*
* int main(void)
* {
*/
BEGIN
DEFINE(detok_scan, offsetof(DETOK, scan));
DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
@ -49,9 +38,7 @@ DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
//add asserts for any offset that is not supported by assembly code
//add asserts for any size that is not supported by assembly code
/*
* return 0;
* }
*/
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */

View File

@ -101,36 +101,6 @@ static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_co
}
while (++i < 16);
}
else
{
int BMode;
int i = 0;
switch (y_mode)
{
case DC_PRED:
BMode = B_DC_PRED;
break;
case V_PRED:
BMode = B_VE_PRED;
break;
case H_PRED:
BMode = B_HE_PRED;
break;
case TM_PRED:
BMode = B_TM_PRED;
break;
default:
BMode = B_DC_PRED;
break;
}
do
{
m->bmi[i].as_mode = (B_PREDICTION_MODE)BMode;
}
while (++i < 16);
}
m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
}

View File

@ -533,7 +533,10 @@ static void stop_token_decoder(VP8D_COMP *pbi)
VP8_COMMON *pc = &pbi->common;
if (pc->multi_token_partition != ONE_PARTITION)
{
vpx_free(pbi->mbc);
pbi->mbc = NULL;
}
}
static void init_frame(VP8D_COMP *pbi)

View File

@ -129,6 +129,7 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
vp8_de_alloc_overlap_lists(pbi);
#endif
vp8_remove_common(&pbi->common);
vpx_free(pbi->mbc);
vpx_free(pbi);
}

View File

@ -135,7 +135,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
mb_init_dequantizer(pbi, xd);
/* do prediction */
if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
@ -181,7 +181,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
xd->predictor, xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs, xd->block[24].diff);
}
else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
else if (xd->mode_info_context->mbmi.mode == B_PRED)
{
for (i = 0; i < 16; i++)
{
@ -334,7 +334,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
{
MODE_INFO *next = xd->mode_info_context +1;
if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
if (next->mbmi.ref_frame == INTRA_FRAME)
{
for (i = 0; i < 16; i++)
pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
@ -824,7 +824,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
{
MODE_INFO *next = xd->mode_info_context +1;
if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME)
if (next->mbmi.ref_frame == INTRA_FRAME)
{
for (i = 0; i < 16; i++)
pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];

View File

@ -38,27 +38,4 @@ static int vp8_treed_read(
return -i;
}
/* Variant reads a binary number given distributions on each bit.
Note that tree is arbitrary; probability of decoding a zero
may or may not depend on previously decoded bits. */
static int vp8_treed_read_num(
vp8_reader *const r, /* !!! must return a 0 or 1 !!! */
vp8_tree t,
const vp8_prob *const p
)
{
vp8_tree_index i = 0;
int v = 0, b;
do
{
b = vp8_read(r, p[i>>1]);
v = (v << 1) + b;
}
while ((i = t[i+b]) > 0);
return v;
}
#endif /* tree_reader_h */

View File

@ -17,7 +17,7 @@
#if HAVE_MMX
void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
static void dequantize_b_mmx(BLOCKD *d)
void vp8_dequantize_b_mmx(BLOCKD *d)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
@ -28,6 +28,7 @@ static void dequantize_b_mmx(BLOCKD *d)
void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
{
#if CONFIG_RUNTIME_CPU_DETECT
int flags = x86_simd_caps();
/* Note:
@ -36,12 +37,11 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
* you modify any of the function mappings present in this file, be sure
* to also update them in static mapings (<arch>/filename_<arch>.h)
*/
#if CONFIG_RUNTIME_CPU_DETECT
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
if (flags & HAS_MMX)
{
pbi->dequant.block = dequantize_b_mmx;
pbi->dequant.block = vp8_dequantize_b_mmx;
pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;

View File

@ -53,10 +53,7 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6;
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
/*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
/*cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
/*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;*/
@ -103,9 +100,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon;
/*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;

View File

@ -10,7 +10,6 @@
EXPORT |vp8_mse16x16_neon|
EXPORT |vp8_get16x16pred_error_neon|
EXPORT |vp8_get4x4sse_cs_neon|
ARM
@ -76,62 +75,6 @@ mse16x16_neon_loop
ENDP
;============================
; r0 unsigned char *src_ptr
; r1 int src_stride
; r2 unsigned char *ref_ptr
; r3 int ref_stride
|vp8_get16x16pred_error_neon| PROC
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - pred_error
vmov.i8 q10, #0
mov r12, #8
get16x16pred_error_neon_loop
vld1.8 {q0}, [r0], r1 ;Load up source and reference
vld1.8 {q2}, [r2], r3
vld1.8 {q1}, [r0], r1
vld1.8 {q3}, [r2], r3
vsubl.u8 q11, d0, d4
vsubl.u8 q12, d1, d5
vsubl.u8 q13, d2, d6
vsubl.u8 q14, d3, d7
vpadal.s16 q8, q11
vmlal.s16 q9, d22, d22
vmlal.s16 q10, d23, d23
subs r12, r12, #1
vpadal.s16 q8, q12
vmlal.s16 q9, d24, d24
vmlal.s16 q10, d25, d25
vpadal.s16 q8, q13
vmlal.s16 q9, d26, d26
vmlal.s16 q10, d27, d27
vpadal.s16 q8, q14
vmlal.s16 q9, d28, d28
vmlal.s16 q10, d29, d29
bne get16x16pred_error_neon_loop
vadd.u32 q10, q9, q10
vpaddl.s32 q0, q8
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vshr.s32 d10, d10, #8
vsub.s32 d0, d1, d10
vmov.32 r0, d0[0]
bx lr
ENDP
;=============================
; r0 unsigned char *src_ptr,

View File

@ -83,9 +83,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
//extern prototype_getmbss(vp8_get_mb_ss_c);
extern prototype_variance(vp8_mse16x16_neon);
extern prototype_get16x16prederror(vp8_get16x16pred_error_neon);
//extern prototype_variance2(vp8_get8x8var_c);
//extern prototype_variance2(vp8_get16x16var_c);
extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);
#if !CONFIG_RUNTIME_CPU_DETECT
@ -149,15 +146,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_neon
#undef vp8_variance_get16x16prederror
#define vp8_variance_get16x16prederror vp8_get16x16pred_error_neon
//#undef vp8_variance_get8x8var
//#define vp8_variance_get8x8var vp8_get8x8var_c
//#undef vp8_variance_get16x16var
//#define vp8_variance_get16x16var vp8_get16x16var_c
#undef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
#endif

View File

@ -9,31 +9,17 @@
*/
#include "vpx_ports/config.h"
#include <stddef.h>
#include "vpx_ports/asm_offsets.h"
#include "vpx_config.h"
#include "block.h"
#include "vp8/common/blockd.h"
#include "onyx_int.h"
#include "treewriter.h"
#include "tokenize.h"
#define ct_assert(name,cond) \
static void assert_##name(void) UNUSED;\
static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
BEGIN
#define DEFINE(sym, val) int sym = val;
/*
#define BLANK() asm volatile("\n->" : : )
*/
/*
* int main(void)
* {
*/
//regular quantize
/* regular quantize */
DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin));
DEFINE(vp8_block_round, offsetof(BLOCK, round));
@ -48,7 +34,7 @@ DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
// subtract
/* subtract */
DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src));
DEFINE(vp8_block_src, offsetof(BLOCK, src));
DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff));
@ -56,7 +42,7 @@ DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride));
DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor));
//pack tokens
/* pack tokens */
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
DEFINE(vp8_writer_value, offsetof(vp8_writer, value));
@ -90,16 +76,16 @@ DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
// These two sizes are used in vp8cx_pack_tokens. They are hard coded
// so if the size changes this will have to be adjusted.
END
/* add asserts for any offset that is not supported by assembly code
* add asserts for any size that is not supported by assembly code
* These are used in vp8cx_pack_tokens. They are hard coded so if their sizes
* change they will have to be adjusted.
*/
#if HAVE_ARMV5TE
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
#endif
//add asserts for any offset that is not supported by assembly code
//add asserts for any size that is not supported by assembly code
/*
* return 0;
* }
*/

View File

@ -776,9 +776,9 @@ static void write_mv_ref
vp8_writer *w, MB_PREDICTION_MODE m, const vp8_prob *p
)
{
#if CONFIG_DEBUG
assert(NEARESTMV <= m && m <= SPLITMV);
#endif
vp8_write_token(w, vp8_mv_ref_tree, p,
vp8_mv_ref_encoding_array - NEARESTMV + m);
}
@ -788,8 +788,9 @@ static void write_sub_mv_ref
vp8_writer *w, B_PREDICTION_MODE m, const vp8_prob *p
)
{
#if CONFIG_DEBUG
assert(LEFT4X4 <= m && m <= NEW4X4);
#endif
vp8_write_token(w, vp8_sub_mv_ref_tree, p,
vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
}
@ -1017,11 +1018,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
blockmode = cpi->mb.partition_info->bmi[j].mode;
blockmv = cpi->mb.partition_info->bmi[j].mv;
#if CONFIG_DEBUG
while (j != L[++k])
if (k >= 16)
assert(0);
#else
while (j != L[++k]);
#endif
leftmv.as_int = left_block_mv(m, k);
abovemv.as_int = above_block_mv(m, k, mis);
mv_contz = vp8_mv_cont(&leftmv, &abovemv);

View File

@ -50,6 +50,7 @@ void vp8_build_block_offsets(MACROBLOCK *x);
void vp8_setup_block_ptrs(MACROBLOCK *x);
int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x );
#ifdef MODE_STATS
unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@ -84,8 +85,6 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
{
unsigned int act;
unsigned int sse;
int sum;
/* TODO: This could also be done over smaller areas (8x8), but that would
* require extensive changes elsewhere, as lambda is assumed to be fixed
* over an entire MB in most of the code.
@ -93,14 +92,9 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
/* This requires a full 32 bits of precision. */
act = (sse<<8) - sum*sum;
/* Drop 4 to give us some headroom to work with. */
act = (act + 8) >> 4;
act = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
act = act<<4;
/* If the region is flat, lower the activity some more. */
if (act < 8<<12)
@ -110,70 +104,121 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
}
// Stub for alternative experimental activity measures.
static unsigned int alt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
static unsigned int alt_activity_measure( VP8_COMP *cpi,
MACROBLOCK *x, int use_dc_pred )
{
unsigned int mb_activity = VP8_ACTIVITY_AVG_MIN;
x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
mb_activity = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
return mb_activity;
return vp8_encode_intra(cpi,x, use_dc_pred);
}
// Measure the activity of the current macroblock
// What we measure here is TBD so abstracted to this function
static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
#define ALT_ACT_MEASURE 1
static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
int mb_row, int mb_col)
{
unsigned int mb_activity;
if ( 1 )
if ( ALT_ACT_MEASURE )
{
int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
// Or use and alternative.
mb_activity = alt_activity_measure( cpi, x, use_dc_pred );
}
else
{
// Original activity measure from Tim T's code.
mb_activity = tt_activity_measure( cpi, x );
}
else
{
// Or use and alternative.
mb_activity = alt_activity_measure( cpi, x );
}
if ( mb_activity < VP8_ACTIVITY_AVG_MIN )
mb_activity = VP8_ACTIVITY_AVG_MIN;
return mb_activity;
}
// Calculate an "average" mb activity value for the frame
#define ACT_MEDIAN 0
static void calc_av_activity( VP8_COMP *cpi, INT64 activity_sum )
{
#if ACT_MEDIAN
// Find median: Simple n^2 algorithm for experimentation
{
unsigned int median;
unsigned int i,j;
unsigned int * sortlist;
unsigned int tmp;
// Create a list to sort to
CHECK_MEM_ERROR(sortlist,
vpx_calloc(sizeof(unsigned int),
cpi->common.MBs));
// Copy map to sort list
vpx_memcpy( sortlist, cpi->mb_activity_map,
sizeof(unsigned int) * cpi->common.MBs );
// Ripple each value down to its correct position
for ( i = 1; i < cpi->common.MBs; i ++ )
{
for ( j = i; j > 0; j -- )
{
if ( sortlist[j] < sortlist[j-1] )
{
// Swap values
tmp = sortlist[j-1];
sortlist[j-1] = sortlist[j];
sortlist[j] = tmp;
}
else
break;
}
}
// Even number MBs so estimate median as mean of two either side.
median = ( 1 + sortlist[cpi->common.MBs >> 1] +
sortlist[(cpi->common.MBs >> 1) + 1] ) >> 1;
cpi->activity_avg = median;
vpx_free(sortlist);
}
#else
// Simple mean for now
cpi->activity_avg = (unsigned int)(activity_sum/cpi->common.MBs);
#endif
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
// Experimental code: return fixed value normalized for several clips
if ( ALT_ACT_MEASURE )
cpi->activity_avg = 100000;
}
#define USE_ACT_INDEX 0
#define OUTPUT_NORM_ACT_STATS 0
// Calculate a normalized activity value for each mb
static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x )
#if USE_ACT_INDEX
// Calculate and activity index for each mb
static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
{
VP8_COMMON *const cm = & cpi->common;
int mb_row, mb_col;
unsigned int act;
unsigned int a;
unsigned int b;
INT64 act;
INT64 a;
INT64 b;
#if OUTPUT_NORM_ACT_STATS
FILE *f = fopen("norm_act.stt", "a");
fprintf(f, "\n");
fprintf(f, "\n%12d\n", cpi->activity_avg );
#endif
// Reset pointers to start of activity map
x->mb_activity_ptr = cpi->mb_activity_map;
x->mb_norm_activity_ptr = cpi->mb_norm_activity_map;
// Calculate normalized mb activity number.
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
@ -185,25 +230,19 @@ static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x )
act = *(x->mb_activity_ptr);
// Calculate a normalized activity number
a = act + 2*cpi->activity_avg;
b = 2*act + cpi->activity_avg;
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
if ( b >= a )
*(x->mb_norm_activity_ptr) = (int)((b + (a>>1))/a);
*(x->activity_ptr) = (int)((b + (a>>1))/a) - 1;
else
*(x->mb_norm_activity_ptr) = -(int)((a + (b>>1))/b);
if ( *(x->mb_norm_activity_ptr) == 0 )
{
*(x->mb_norm_activity_ptr) = 1;
}
*(x->activity_ptr) = 1 - (int)((a + (b>>1))/b);
#if OUTPUT_NORM_ACT_STATS
fprintf(f, " %6d", *(x->mb_norm_activity_ptr));
fprintf(f, " %6d", *(x->mb_activity_ptr));
#endif
// Increment activity map pointers
x->mb_activity_ptr++;
x->mb_norm_activity_ptr++;
}
#if OUTPUT_NORM_ACT_STATS
@ -217,33 +256,44 @@ static void calc_norm_activity( VP8_COMP *cpi, MACROBLOCK *x )
#endif
}
#endif
// Loop through all MBs. Note activity of each, average activity and
// calculate a normalized activity for each
static void build_activity_map( VP8_COMP *cpi )
{
MACROBLOCK *const x = & cpi->mb;
MACROBLOCKD *xd = &x->e_mbd;
VP8_COMMON *const cm = & cpi->common;
#if ALT_ACT_MEASURE
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
int recon_yoffset;
int recon_y_stride = new_yv12->y_stride;
#endif
int mb_row, mb_col;
unsigned int mb_activity;
INT64 activity_sum = 0;
// Initialise source buffer pointer
x->src = *cpi->Source;
// Set pointer to start of activity map
x->mb_activity_ptr = cpi->mb_activity_map;
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
#if ALT_ACT_MEASURE
// reset above block coeffs
xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
#endif
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
#if ALT_ACT_MEASURE
xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
xd->left_available = (mb_col != 0);
recon_yoffset += 16;
#endif
// measure activity
mb_activity = mb_activity_measure( cpi, x );
mb_activity = mb_activity_measure( cpi, x, mb_row, mb_col );
// Keep frame sum
activity_sum += mb_activity;
@ -258,49 +308,50 @@ static void build_activity_map( VP8_COMP *cpi )
x->src.y_buffer += 16;
}
// adjust to the next row of mbs
x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
#if ALT_ACT_MEASURE
//extend the recon for intra prediction
vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
#endif
}
// Calculate an "average" MB activity
calc_av_activity(cpi, activity_sum);
// Calculate a normalized activity number of each mb
calc_norm_activity( cpi, x );
#if USE_ACT_INDEX
// Calculate an activity index number of each mb
calc_activity_index( cpi, x );
#endif
}
// Activity masking based on Tim T's original code
// Macroblock activity masking
void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
{
unsigned int a;
unsigned int b;
unsigned int act = *(x->mb_activity_ptr);
#if USE_ACT_INDEX
x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
x->errorperbit = x->rdmult/x->rddiv;
#else
INT64 a;
INT64 b;
INT64 act = *(x->mb_activity_ptr);
// Apply the masking to the RD multiplier.
a = act + 2*cpi->activity_avg;
b = 2*act + cpi->activity_avg;
a = act + (2*cpi->activity_avg);
b = (2*act) + cpi->activity_avg;
//tmp = (unsigned int)(((INT64)tmp*b + (a>>1))/a);
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
x->errorperbit = x->rdmult/x->rddiv;
// For now now zbin adjustment on mode choice
x->act_zbin_adj = 0;
}
#endif
// Stub function to use a normalized activity measure stored at mb level.
void vp8_norm_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
{
int norm_act;
norm_act = *(x->mb_norm_activity_ptr);
if (norm_act > 0)
x->rdmult = norm_act * (x->rdmult);
else
x->rdmult = -(x->rdmult / norm_act);
// For now now zbin adjustment on mode choice
x->act_zbin_adj = 0;
// Activity based Zbin adjustment
adjust_act_zbin(cpi, x);
}
static
@ -356,7 +407,6 @@ void encode_mb_row(VP8_COMP *cpi,
// Set the mb activity pointer to the start of the row.
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
x->mb_norm_activity_ptr = &cpi->mb_norm_activity_map[map_index];
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@ -476,7 +526,6 @@ void encode_mb_row(VP8_COMP *cpi,
// Increment the activity mask pointers.
x->mb_activity_ptr++;
x->mb_norm_activity_ptr++;
/* save the block info */
for (i = 0; i < 16; i++)
@ -525,6 +574,92 @@ void encode_mb_row(VP8_COMP *cpi,
#endif
}
void init_encode_frame_mb_context(VP8_COMP *cpi)
{
MACROBLOCK *const x = & cpi->mb;
VP8_COMMON *const cm = & cpi->common;
MACROBLOCKD *const xd = & x->e_mbd;
// GF active flags data structure
x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
// Activity map pointer
x->mb_activity_ptr = cpi->mb_activity_map;
x->vector_range = 32;
x->act_zbin_adj = 0;
x->partition_info = x->pi;
xd->mode_info_context = cm->mi;
xd->mode_info_stride = cm->mode_info_stride;
xd->frame_type = cm->frame_type;
xd->frames_since_golden = cm->frames_since_golden;
xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
// reset intra mode contexts
if (cm->frame_type == KEY_FRAME)
vp8_init_mbmode_probs(cm);
// Copy data over into macro block data sturctures.
x->src = * cpi->Source;
xd->pre = cm->yv12_fb[cm->lst_fb_idx];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
// set up frame for intra coded blocks
vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
vp8_build_block_offsets(x);
vp8_setup_block_dptrs(&x->e_mbd);
vp8_setup_block_ptrs(x);
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
xd->left_context = &cm->left_context;
vp8_zero(cpi->count_mb_ref_frame_usage)
vp8_zero(cpi->ymode_count)
vp8_zero(cpi->uv_mode_count)
x->mvc = cm->fc.mvc;
vpx_memset(cm->above_context, 0,
sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
xd->ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
// Special case treatment when GF and ARF are not sensible options for reference
if (cpi->ref_frame_flags == VP8_LAST_FLAG)
{
xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_zero(255);
xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(255)
+ vp8_cost_zero(128);
xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(255)
+ vp8_cost_one(128);
}
else
{
xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_zero(cpi->prob_last_coded);
xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(cpi->prob_last_coded)
+ vp8_cost_zero(cpi->prob_gf_coded);
xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(cpi->prob_last_coded)
+ vp8_cost_one(cpi->prob_gf_coded);
}
}
void vp8_encode_frame(VP8_COMP *cpi)
{
int mb_row;
@ -536,6 +671,17 @@ void vp8_encode_frame(VP8_COMP *cpi)
int segment_counts[MAX_MB_SEGMENTS];
int totalrate;
vpx_memset(segment_counts, 0, sizeof(segment_counts));
totalrate = 0;
if (cpi->compressor_speed == 2)
{
if (cpi->oxcf.cpu_used < 0)
cpi->Speed = -(cpi->oxcf.cpu_used);
else
vp8_auto_select_speed(cpi);
}
// Functions setup for all frame types so we can use MC in AltRef
if (cm->mcomp_filter_type == SIXTAP)
{
@ -560,10 +706,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
&cpi->common.rtcd.subpix, bilinear16x16);
}
x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure
x->vector_range = 32;
// Reset frame count of inter 0,0 motion vector useage.
cpi->inter_zz_count = 0;
@ -574,89 +716,34 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->skip_true_count = 0;
cpi->skip_false_count = 0;
x->act_zbin_adj = 0;
#if 0
// Experimental code
cpi->frame_distortion = 0;
cpi->last_mb_distortion = 0;
#endif
totalrate = 0;
x->partition_info = x->pi;
xd->mode_info_context = cm->mi;
xd->mode_info_stride = cm->mode_info_stride;
xd->frame_type = cm->frame_type;
xd->frames_since_golden = cm->frames_since_golden;
xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
vp8_zero(cpi->MVcount);
// vp8_zero( Contexts)
vp8_zero(cpi->coef_counts);
// reset intra mode contexts
if (cm->frame_type == KEY_FRAME)
vp8_init_mbmode_probs(cm);
vp8cx_frame_init_quantizer(cpi);
if (cpi->compressor_speed == 2)
{
if (cpi->oxcf.cpu_used < 0)
cpi->Speed = -(cpi->oxcf.cpu_used);
else
vp8_auto_select_speed(cpi);
}
vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
vp8cx_initialize_me_consts(cpi, cm->base_qindex);
// Copy data over into macro block data sturctures.
x->src = * cpi->Source;
xd->pre = cm->yv12_fb[cm->lst_fb_idx];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
// set up frame new frame for intra coded blocks
vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
vp8_build_block_offsets(x);
vp8_setup_block_dptrs(&x->e_mbd);
vp8_setup_block_ptrs(x);
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
xd->left_context = &cm->left_context;
vp8_zero(cpi->count_mb_ref_frame_usage)
vp8_zero(cpi->ymode_count)
vp8_zero(cpi->uv_mode_count)
x->mvc = cm->fc.mvc;
vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
if(1)
{
// Build a frame level activity map
build_activity_map(cpi);
}
// Initialize encode frame context.
init_encode_frame_mb_context(cpi);
// Reset various MB pointers.
x->src = *cpi->Source;
x->mb_activity_ptr = cpi->mb_activity_map;
x->mb_norm_activity_ptr = cpi->mb_norm_activity_map;
// Build a frame level activity map
build_activity_map(cpi);
}
// re-initencode frame context.
init_encode_frame_mb_context(cpi);
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
@ -997,99 +1084,45 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
// Experimental stub function to create a per MB zbin adjustment based on
// some previously calculated measure of MB activity.
void adjust_act_zbin( VP8_COMP *cpi, int rate, MACROBLOCK *x )
static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
{
INT64 act;
#if USE_ACT_INDEX
x->act_zbin_adj = *(x->mb_activity_ptr);
#else
INT64 a;
INT64 b;
INT64 act = *(x->mb_activity_ptr);
// Read activity from the map
act = (INT64)(*(x->mb_activity_ptr));
// Calculate a zbin adjustment for this mb
// Apply the masking to the RD multiplier.
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
if ( b > a )
//x->act_zbin_adj = (char)((b * 8) / a) - 8;
x->act_zbin_adj = 8;
if ( act > cpi->activity_avg )
x->act_zbin_adj = (int)(((INT64)b + (a>>1))/a) - 1;
else
x->act_zbin_adj = 0;
// Tmp force to 0 to disable.
x->act_zbin_adj = 0;
x->act_zbin_adj = 1 - (int)(((INT64)a + (b>>1))/b);
#endif
}
int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
{
int Error4x4, Error16x16;
int rate4x4, rate16x16, rateuv;
int dist4x4, dist16x16, distuv;
int rate = 0;
int rate4x4_tokenonly = 0;
int rate16x16_tokenonly = 0;
int rateuv_tokenonly = 0;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
int rate;
if (cpi->sf.RD && cpi->compressor_speed != 2)
{
vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
rate += rateuv;
Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
adjust_act_zbin( cpi, rate, x );
vp8_update_zbin_extra(cpi, x);
}
}
vp8_rd_pick_intra_mode(cpi, x, &rate);
else
vp8_pick_intra_mode(cpi, x, &rate);
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
int rate2, best_distortion;
MB_PREDICTION_MODE mode, best_mode = DC_PRED;
int this_rd;
Error16x16 = INT_MAX;
vp8_pick_intra_mbuv_mode(x);
for (mode = DC_PRED; mode <= TM_PRED; mode ++)
{
int distortion2;
x->e_mbd.mode_info_context->mbmi.mode = mode;
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16);
rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (Error16x16 > this_rd)
{
Error16x16 = this_rd;
best_mode = mode;
best_distortion = distortion2;
}
}
x->e_mbd.mode_info_context->mbmi.mode = best_mode;
Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
adjust_act_zbin( cpi, x );
vp8_update_zbin_extra(cpi, x);
}
if (Error4x4 < Error16x16)
{
x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
}
else
{
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
}
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
sum_intra_stats(cpi, x);
@ -1163,7 +1196,7 @@ int vp8cx_encode_inter_macroblock
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
{
// Adjust the zbin based on this MB rate.
adjust_act_zbin( cpi, rate, x );
adjust_act_zbin( cpi, x );
}
#if 0
@ -1193,11 +1226,10 @@ int vp8cx_encode_inter_macroblock
{
// Experimental code. Special case for gf and arf zeromv modes.
// Increase zbin size to supress noise
cpi->zbin_mode_boost = 0;
if (cpi->zbin_mode_boost_enabled)
{
if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
cpi->zbin_mode_boost = 0;
else
if ( xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME )
{
if (xd->mode_info_context->mbmi.mode == ZEROMV)
{
@ -1212,9 +1244,6 @@ int vp8cx_encode_inter_macroblock
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
}
}
else
cpi->zbin_mode_boost = 0;
vp8_update_zbin_extra(cpi, x);
}

View File

@ -28,6 +28,34 @@
#define IF_RTCD(x) NULL
#endif
int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
{
int i;
int intra_pred_var = 0;
(void) cpi;
if (use_dc_pred)
{
x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
}
else
{
for (i = 0; i < 16; i++)
{
x->e_mbd.block[i].bmi.as_mode = B_DC_PRED;
vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
}
}
intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
return intra_pred_var;
}
void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
MACROBLOCK *x, int ib)
@ -81,30 +109,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
RECON_INVOKE(&rtcd->common->recon, recon_mby)
(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
// make sure block modes are set the way we want them for context updates
for (b = 0; b < 16; b++)
{
BLOCKD *d = &x->e_mbd.block[b];
switch (x->e_mbd.mode_info_context->mbmi.mode)
{
case DC_PRED:
d->bmi.as_mode = B_DC_PRED;
break;
case V_PRED:
d->bmi.as_mode = B_VE_PRED;
break;
case H_PRED:
d->bmi.as_mode = B_HE_PRED;
break;
case TM_PRED:
d->bmi.as_mode = B_TM_PRED;
break;
default:
d->bmi.as_mode = B_DC_PRED;
break;
}
}
}
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
@ -124,4 +128,3 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
}

View File

@ -13,6 +13,7 @@
#define _ENCODEINTRA_H_
#include "onyx_int.h"
int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred);
void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);

View File

@ -114,8 +114,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// Set the mb activity pointer to the start of the row.
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
x->mb_norm_activity_ptr =
&cpi->mb_norm_activity_map[map_index];
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@ -230,7 +228,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// Increment the activity mask pointers.
x->mb_activity_ptr++;
x->mb_norm_activity_ptr++;
/* save the block info */
for (i = 0; i < 16; i++)

View File

@ -81,35 +81,6 @@ static const int cq_level[QINDEX_RANGE] =
static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
static int encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
{
int i;
int intra_pred_var = 0;
(void) cpi;
if (use_dc_pred)
{
x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
}
else
{
for (i = 0; i < 16; i++)
{
x->e_mbd.block[i].bmi.as_mode = B_DC_PRED;
vp8_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
}
}
intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff);
return intra_pred_var;
}
// Resets the first pass file to the given position using a relative seek from the current position
static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position)
{
@ -243,25 +214,11 @@ static int frame_max_bits(VP8_COMP *cpi)
int max_bits;
// For CBR we need to also consider buffer fullness.
// If we are running below the optimal level then we need to gradually tighten up on max_bits.
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
// For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
// If our buffer is below the optimum level
if (buffer_fullness_ratio < 1.0)
{
// The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4.
int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2;
max_bits = (int)(max_bits * buffer_fullness_ratio);
if (max_bits < min_max_bits)
max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil.
}
max_bits = 2 * cpi->av_per_frame_bandwidth;
max_bits -= cpi->buffered_av_per_frame_bandwidth;
max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0);
}
// VBR
else
@ -278,6 +235,45 @@ static int frame_max_bits(VP8_COMP *cpi)
}
static int gf_group_max_bits(VP8_COMP *cpi)
{
// Max allocation for a golden frame group
int max_bits;
// For CBR we need to also consider buffer fullness.
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
max_bits = cpi->av_per_frame_bandwidth * cpi->baseline_gf_interval;
if (max_bits > cpi->oxcf.optimal_buffer_level)
{
max_bits -= cpi->oxcf.optimal_buffer_level;
max_bits += cpi->buffer_level;
}
else
{
max_bits -= (cpi->buffered_av_per_frame_bandwidth
- cpi->av_per_frame_bandwidth)
* cpi->baseline_gf_interval;
}
max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0);
}
else
{
// For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
max_bits *= cpi->baseline_gf_interval;
}
// Trap case where we are out of bits
if (max_bits < 0)
max_bits = 0;
return max_bits;
}
static void output_stats(const VP8_COMP *cpi,
struct vpx_codec_pkt_list *pktlist,
FIRSTPASS_STATS *stats)
@ -582,7 +578,7 @@ void vp8_first_pass(VP8_COMP *cpi)
xd->left_available = (mb_col != 0);
// do intra 16x16 prediction
this_error = encode_intra(cpi, x, use_dc_pred);
this_error = vp8_encode_intra(cpi, x, use_dc_pred);
// "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame)
// We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv.
@ -1362,7 +1358,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
double abs_mv_in_out_accumulator = 0.0;
double mod_err_per_mb_accumulator = 0.0;
int max_bits = frame_max_bits(cpi); // Max for a single frame
int max_group_bits;
unsigned int allow_alt_ref =
cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
@ -1715,8 +1711,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
// Clip cpi->twopass.gf_group_bits based on user supplied data rate variability limit (cpi->oxcf.two_pass_vbrmax_section)
if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
max_group_bits = gf_group_max_bits(cpi);
if (cpi->twopass.gf_group_bits > max_group_bits)
cpi->twopass.gf_group_bits = max_group_bits;
// Reset the file position
reset_fpf_position(cpi, start_pos);
@ -1725,14 +1722,15 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->twopass.modified_error_used += gf_group_err;
// Assign bits to the arf or gf.
{
for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) {
int Boost;
int frames_in_section;
int allocation_chunks;
int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
int gf_bits;
// For ARF frames
if (cpi->source_alt_ref_pending)
if (cpi->source_alt_ref_pending && i == 0)
{
Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
//Boost += (cpi->baseline_gf_interval * 25);
@ -1771,7 +1769,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
// Calculate the number of bits to be spent on the gf or arf based on the boost number
cpi->twopass.gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks));
gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks));
// If the frame that is to be boosted is simpler than the average for
// the gf/arf group then use an alternative calculation
@ -1789,9 +1787,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits /
(double)allocation_chunks));
if (cpi->twopass.gf_bits > alt_gf_bits)
if (gf_bits > alt_gf_bits)
{
cpi->twopass.gf_bits = alt_gf_bits;
gf_bits = alt_gf_bits;
}
}
// Else if it is harder than other frames in the group make sure it at
@ -1804,23 +1802,29 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
mod_frame_err /
DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left));
if (alt_gf_bits > cpi->twopass.gf_bits)
if (alt_gf_bits > gf_bits)
{
cpi->twopass.gf_bits = alt_gf_bits;
gf_bits = alt_gf_bits;
}
}
// Apply an additional limit for CBR
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
{
if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1))
cpi->twopass.gf_bits = cpi->buffer_level >> 1;
}
// Dont allow a negative value for gf_bits
if (cpi->twopass.gf_bits < 0)
cpi->twopass.gf_bits = 0;
if (gf_bits < 0)
gf_bits = 0;
gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame
if (i == 0)
{
cpi->twopass.gf_bits = gf_bits;
}
if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)))
{
cpi->per_frame_bandwidth = gf_bits; // Per frame bit target for this frame
}
}
{
// Adjust KF group bits and error remainin
cpi->twopass.kf_group_error_left -= gf_group_err;
cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
@ -1835,7 +1839,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
else
cpi->twopass.gf_group_error_left = gf_group_err;
cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits;
cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth;
if (cpi->twopass.gf_group_bits < 0)
cpi->twopass.gf_group_bits = 0;
@ -1851,13 +1855,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
else
cpi->twopass.mid_gf_extra_bits = 0;
cpi->twopass.gf_bits += cpi->min_frame_bandwidth; // Add in minimum for a frame
}
if (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) // Normal GF and not a KF
{
cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for this frame
}
// Adjustment to estimate_max_q based on a measure of complexity of the section
@ -1907,12 +1904,6 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int max_bits = frame_max_bits(cpi); // Max for a single frame
// The final few frames have special treatment
if (cpi->frames_till_gf_update_due >= (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame))
{
cpi->twopass.gf_group_bits = (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;;
}
// Calculate modified prediction error used in bit allocation
modified_err = calculate_modified_err(cpi, this_frame);
@ -2014,22 +2005,10 @@ void vp8_second_pass(VP8_COMP *cpi)
if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))
{
// Assign a standard frames worth of bits from those allocated to the GF group
int bak = cpi->per_frame_bandwidth;
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
assign_std_frame_bits(cpi, &this_frame_copy);
// If appropriate (we are switching into ARF active but it was not previously active) apply a boost for the gf at the start of the group.
//if ( !cpi->source_alt_ref_active && (cpi->gfu_boost > 150) )
if (FALSE)
{
int extra_bits;
int pct_extra = (cpi->gfu_boost - 100) / 50;
pct_extra = (pct_extra > 20) ? 20 : pct_extra;
extra_bits = (cpi->twopass.gf_group_bits * pct_extra) / 100;
cpi->twopass.gf_group_bits -= extra_bits;
cpi->per_frame_bandwidth += extra_bits;
}
cpi->per_frame_bandwidth = bak;
}
}

View File

@ -67,9 +67,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;

View File

@ -1546,6 +1546,7 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
cpi->buffered_av_per_frame_bandwidth = cpi->av_per_frame_bandwidth;
cpi->total_actual_bits = 0;
cpi->total_target_vs_actual = 0;
@ -1641,7 +1642,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
break;
}
if (cpi->pass == 0)
if (cpi->pass == 0 && cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER)
cpi->auto_worst_q = 1;
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
@ -3528,7 +3529,8 @@ static void encode_frame_to_data_rate
// For CBR if the buffer reaches its maximum level then we can no longer
// save up bits for later frames so we might as well use them up
// on the current frame.
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
if (cpi->pass == 2
&& (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
(cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode)
{
int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4
@ -3619,6 +3621,9 @@ static void encode_frame_to_data_rate
}
else
{
if(cpi->pass != 2)
Q = cpi->avg_frame_qindex;
cpi->active_best_quality = inter_minq[Q];
// For the constant/constrained quality mode we dont want
@ -3931,15 +3936,16 @@ static void encode_frame_to_data_rate
(cpi->active_worst_quality < cpi->worst_quality) &&
(cpi->projected_frame_size > frame_over_shoot_limit))
{
int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit;
/* step down active_worst_quality such that the corresponding
* active_best_quality will be equal to the current
* active_worst_quality + 1
*/
int i;
// If so is there any scope for relaxing it
while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0))
{
cpi->active_worst_quality++;
top_index = cpi->active_worst_quality;
over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size.
}
for(i=cpi->active_worst_quality; i<cpi->worst_quality; i++)
if(inter_minq[i] >= cpi->active_worst_quality + 1)
break;
cpi->active_worst_quality = i;
// If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop.
active_worst_qchanged = TRUE;
@ -4327,10 +4333,9 @@ static void encode_frame_to_data_rate
// Update the buffer level variable.
// Non-viewable frames are a special case and are treated as pure overhead.
if ( !cm->show_frame )
cpi->bits_off_target -= cpi->projected_frame_size;
else
cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
if ( cm->show_frame )
cpi->bits_off_target += cpi->av_per_frame_bandwidth;
cpi->bits_off_target -= cpi->projected_frame_size;
// Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
@ -4344,7 +4349,33 @@ static void encode_frame_to_data_rate
// Debug stats
cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size);
cpi->buffer_level = cpi->bits_off_target;
// Update the buffered average bitrate
{
long long numerator;
numerator = cpi->oxcf.maximum_buffer_size
- cpi->buffered_av_per_frame_bandwidth
+ cpi->projected_frame_size;
numerator *= cpi->buffered_av_per_frame_bandwidth;
cpi->buffered_av_per_frame_bandwidth = numerator
/ cpi->oxcf.maximum_buffer_size;
}
{
long long tmp = (long long)cpi->buffered_av_per_frame_bandwidth
* cpi->oxcf.maximum_buffer_size
/ cpi->av_per_frame_bandwidth;
cpi->buffer_level = cpi->oxcf.maximum_buffer_size
- tmp
+ cpi->oxcf.optimal_buffer_level;
}
// Accumulate overshoot error.
cpi->accumulated_overshoot +=
(cpi->projected_frame_size > cpi->av_per_frame_bandwidth)
? cpi->projected_frame_size - cpi->av_per_frame_bandwidth
: 0;
// Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames
if (cm->frame_type == KEY_FRAME)

View File

@ -47,8 +47,8 @@
#define MIN_THRESHMULT 32
#define MAX_THRESHMULT 512
#define GF_ZEROMV_ZBIN_BOOST 24
#define LF_ZEROMV_ZBIN_BOOST 12
#define GF_ZEROMV_ZBIN_BOOST 12
#define LF_ZEROMV_ZBIN_BOOST 6
#define MV_ZBIN_BOOST 4
#define ZBIN_OQ_MAX 192
@ -351,6 +351,10 @@ typedef struct VP8_COMP
int per_frame_bandwidth; // Current section per frame bandwidth target
int av_per_frame_bandwidth; // Average frame size target for clip
int min_frame_bandwidth; // Minimum allocation that should be used for any frame
int buffered_av_per_frame_bandwidth; // Average bitrate over the last buffer
int buffered_av_per_frame_bandwidth_rem; // Average bitrate remainder
int accumulated_overshoot; // Accumulated # of bits spent > target
int inter_frame_target;
double output_frame_rate;
long long last_time_stamp_seen;

View File

@ -43,7 +43,6 @@ extern const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
extern unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
extern int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *, int *, int *, int, int *mvcost[2], int, int fullpixel);
extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
@ -98,37 +97,6 @@ static int get_inter_mbpred_error(MACROBLOCK *mb,
}
unsigned int vp8_get16x16pred_error_c
(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride
)
{
unsigned pred_error = 0;
int i, j;
int sum = 0;
for (i = 0; i < 16; i++)
{
int diff;
for (j = 0; j < 16; j++)
{
diff = src_ptr[j] - ref_ptr[j];
sum += diff;
pred_error += diff * diff;
}
src_ptr += src_stride;
ref_ptr += ref_stride;
}
pred_error -= sum * sum / 256;
return pred_error;
}
unsigned int vp8_get4x4sse_cs_c
(
@ -172,8 +140,7 @@ static int pick_intra4x4block(
MACROBLOCK *x,
int ib,
B_PREDICTION_MODE *best_mode,
B_PREDICTION_MODE above,
B_PREDICTION_MODE left,
unsigned int *mode_costs,
int *bestrate,
int *bestdistortion)
@ -185,16 +152,6 @@ static int pick_intra4x4block(
int best_rd = INT_MAX; // 1<<30
int rate;
int distortion;
unsigned int *mode_costs;
if (x->e_mbd.frame_type == KEY_FRAME)
{
mode_costs = x->bmode_costs[above][left];
}
else
{
mode_costs = x->inter_bmode_costs;
}
for (mode = B_DC_PRED; mode <= B_HE_PRED /*B_HU_PRED*/; mode++)
{
@ -221,7 +178,7 @@ static int pick_intra4x4block(
}
int vp8_pick_intra4x4mby_modes
static int pick_intra4x4mby_modes
(
const VP8_ENCODER_RTCD *rtcd,
MACROBLOCK *mb,
@ -234,20 +191,30 @@ int vp8_pick_intra4x4mby_modes
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error;
int distortion = 0;
unsigned int *bmode_costs;
vp8_intra_prediction_down_copy(xd);
bmode_costs = mb->inter_bmode_costs;
for (i = 0; i < 16; i++)
{
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
const B_PREDICTION_MODE L = left_block_mode(mic, i);
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(d);
pick_intra4x4block(rtcd, mb, i, &best_mode, A, L, &r, &d);
if (mb->e_mbd.frame_type == KEY_FRAME)
{
const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
const B_PREDICTION_MODE L = left_block_mode(mic, i);
bmode_costs = mb->bmode_costs[A][L];
}
pick_intra4x4block(rtcd, mb, i, &best_mode, bmode_costs, &r, &d);
cost += r;
distortion += d;
@ -275,7 +242,7 @@ int vp8_pick_intra4x4mby_modes
return error;
}
void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb)
static void pick_intra_mbuv_mode(MACROBLOCK *mb)
{
MACROBLOCKD *x = &mb->e_mbd;
@ -443,26 +410,23 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
BLOCK *b = &x->block[0];
BLOCKD *d = &x->e_mbd.block[0];
MACROBLOCKD *xd = &x->e_mbd;
union b_mode_info best_bmodes[16];
MB_MODE_INFO best_mbmode;
int_mv best_ref_mv;
int_mv mode_mv[MB_MODE_COUNT];
MB_PREDICTION_MODE this_mode;
int num00;
int i;
int mdcounts[4];
int best_rd = INT_MAX; // 1 << 30;
int best_intra_rd = INT_MAX;
int mode_index;
int ref_frame_cost[MAX_REF_FRAMES];
int rate;
int rate2;
int distortion2;
int bestsme;
//int all_rds[MAX_MODES]; // Experimental debug code.
int best_mode_index = 0;
unsigned int sse = INT_MAX;
unsigned int sse = INT_MAX, best_sse = INT_MAX;
int_mv mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
@ -485,7 +449,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
vpx_memset(near_mv, 0, sizeof(near_mv));
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
// set up all the refframe dependent pointers.
@ -536,32 +499,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
*returnintra = INT_MAX;
x->skip = 0;
ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
// Special case treatment when GF and ARF are not sensible options for reference
if (cpi->ref_frame_flags == VP8_LAST_FLAG)
{
ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_zero(255);
ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(255)
+ vp8_cost_zero(128);
ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(255)
+ vp8_cost_one(128);
}
else
{
ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_zero(cpi->prob_last_coded);
ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(cpi->prob_last_coded)
+ vp8_cost_zero(cpi->prob_gf_coded);
ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(cpi->prob_last_coded)
+ vp8_cost_one(cpi->prob_gf_coded);
}
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
// if we encode a new mv this is important
@ -613,7 +550,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
// Work out the cost assosciated with selecting the reference frame
frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
frame_cost =
x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
rate2 += frame_cost;
// everything but intra
@ -659,10 +597,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
switch (this_mode)
{
case B_PRED:
// Pass best so far to vp8_pick_intra4x4mby_modes to use as breakout
distortion2 = *returndistortion;
vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x,
&rate, &distortion2);
// Pass best so far to pick_intra4x4mby_modes to use as breakout
distortion2 = best_sse;
pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2);
if (distortion2 == INT_MAX)
{
@ -672,9 +609,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{
rate2 += rate;
distortion2 = VARIANCE_INVOKE
(&cpi->rtcd.variance, get16x16prederror)(
(&cpi->rtcd.variance, var16x16)(
x->src.y_buffer, x->src.y_stride,
x->e_mbd.predictor, 16);
x->e_mbd.predictor, 16, &sse);
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd)
@ -697,7 +634,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
case TM_PRED:
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
(x->src.y_buffer, x->src.y_stride,
x->e_mbd.predictor, 16, &sse);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@ -886,15 +825,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
*returnrate = rate2;
*returndistortion = distortion2;
best_sse = sse;
best_rd = this_rd;
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
if (this_mode == B_PRED)
for (i = 0; i < 16; i++)
{
best_bmodes[i].as_mode = x->e_mbd.block[i].bmi.as_mode;
}
// Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
@ -956,15 +890,52 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
if (best_mbmode.mode <= B_PRED)
{
/* set mode_info_context->mbmi.uv_mode */
vp8_pick_intra_mbuv_mode(x);
pick_intra_mbuv_mode(x);
}
if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED)
{
for (i = 0; i < 16; i++)
{
x->e_mbd.block[i].bmi.as_mode = best_bmodes[i].as_mode;
}
}
update_mvcount(cpi, &x->e_mbd, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
}
void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
{
int error4x4, error16x16 = INT_MAX;
int rate, best_rate = 0, distortion, best_sse;
MB_PREDICTION_MODE mode, best_mode = DC_PRED;
int this_rd;
unsigned int sse;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
pick_intra_mbuv_mode(x);
for (mode = DC_PRED; mode <= TM_PRED; mode ++)
{
x->e_mbd.mode_info_context->mbmi.mode = mode;
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
(&x->e_mbd);
distortion = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, &sse);
rate = x->mbmode_cost[x->e_mbd.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (error16x16 > this_rd)
{
error16x16 = this_rd;
best_mode = mode;
best_sse = sse;
best_rate = rate;
}
}
x->e_mbd.mode_info_context->mbmi.mode = best_mode;
error4x4 = pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate,
&best_sse);
if (error4x4 < error16x16)
{
x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
best_rate = rate;
}
*rate_ = best_rate;
}

View File

@ -14,7 +14,6 @@
#include "vpx_ports/config.h"
#include "vp8/common/onyxc_int.h"
extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion);
extern void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb);
extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
#endif

View File

@ -48,9 +48,6 @@ void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, i
void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// c imports
@ -88,9 +85,6 @@ extern sub_pixel_variance_function sub_pixel_variance16x8_c;
extern sub_pixel_variance_function sub_pixel_variance16x16_c;
extern unsigned int vp8_get_mb_ss_c(short *);
extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// ppc
@ -149,9 +143,6 @@ void vp8_cmachine_specific_config(void)
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ppc;
vp8_get_mb_ss = vp8_get_mb_ss_c;
vp8_get16x16pred_error = vp8_get16x16pred_error_c;
vp8_get8x8var = vp8_get8x8var_ppc;
vp8_get16x16var = vp8_get16x16var_ppc;
vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
vp8_sad16x16 = vp8_sad16x16_ppc;

View File

@ -650,10 +650,10 @@ static void calc_gf_params(VP8_COMP *cpi)
static void calc_pframe_target_size(VP8_COMP *cpi)
{
int min_frame_target;
int min_frame_target, max_frame_target;
int Adjustment;
min_frame_target = 0;
min_frame_target = 1;
if (cpi->pass == 2)
{
@ -661,10 +661,19 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5))
min_frame_target = cpi->av_per_frame_bandwidth >> 5;
}
else if (min_frame_target < cpi->per_frame_bandwidth / 4)
min_frame_target = cpi->per_frame_bandwidth / 4;
max_frame_target = INT_MAX;
}
else
{
if (min_frame_target < cpi->per_frame_bandwidth / 4)
min_frame_target = cpi->per_frame_bandwidth / 4;
/* Don't allow the target to completely deplete the buffer. */
max_frame_target = cpi->buffer_level + cpi->av_per_frame_bandwidth;
if(max_frame_target < min_frame_target)
max_frame_target = min_frame_target;
}
// Special alt reference frame case
if (cpi->common.refresh_alt_ref_frame)
@ -1157,6 +1166,32 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
}
}
if (cpi->pass==0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER){
/* determine the accumulated error to apply to this frame. Apply
* more of the error when we've been undershooting, less when
* we've been overshooting
*/
long long adjust;
int bitrate_error;
bitrate_error = cpi->av_per_frame_bandwidth
- cpi->buffered_av_per_frame_bandwidth;
adjust = cpi->accumulated_overshoot;
adjust *= cpi->av_per_frame_bandwidth + bitrate_error;
adjust /= cpi->oxcf.maximum_buffer_size;
if (adjust > (cpi->this_frame_target - min_frame_target))
adjust = (cpi->this_frame_target - min_frame_target);
else if (adjust < 0)
adjust = 0;
cpi->this_frame_target -= adjust;
cpi->accumulated_overshoot -= adjust;
}
if(cpi->this_frame_target > max_frame_target)
cpi->this_frame_target = max_frame_target;
}

View File

@ -201,47 +201,47 @@ static int rdmult_lut[QINDEX_RANGE]=
/* values are now correlated to quantizer */
static int sad_per_bit16lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 6, 6,
6, 6, 6, 6, 6, 7, 7, 7,
7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 9, 9,
9, 9, 9, 9, 10, 10, 10, 10,
10, 10, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 13,
13, 13, 13, 13, 13, 14, 14, 14,
14, 14, 15, 15, 15, 15, 15, 15,
16, 16, 16, 16, 16, 16, 17, 17,
17, 17, 17, 17, 17, 18, 18, 18,
18, 18, 19, 19, 19, 19, 19, 19,
20, 20, 20, 21, 21, 21, 21, 22,
22, 22, 23, 23, 23, 24, 24, 24,
25, 25, 26, 26, 27, 27, 27, 28,
28, 28, 29, 29, 30, 30, 31, 31
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 10, 10,
10, 10, 10, 10, 10, 10, 11, 11,
11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 13, 13, 13, 13, 14, 14
};
static int sad_per_bit4lut[QINDEX_RANGE] =
{
5, 5, 5, 5, 5, 5, 7, 7,
2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 5, 5,
5, 5, 5, 5, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 8, 8, 8,
8, 8, 8, 8, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 11, 11,
11, 11, 11, 11, 13, 13, 13, 13,
13, 13, 14, 14, 14, 14, 14, 14,
16, 16, 16, 16, 16, 16, 16, 17,
17, 17, 17, 17, 17, 19, 19, 19,
19, 19, 20, 20, 20, 20, 20, 20,
22, 22, 22, 22, 22, 22, 23, 23,
23, 23, 23, 23, 23, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26,
28, 28, 28, 29, 29, 29, 29, 31,
31, 31, 32, 32, 32, 34, 34, 34,
35, 35, 37, 37, 38, 38, 38, 40,
40, 40, 41, 41, 43, 43, 44, 44,
8, 8, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 14,
14, 14, 14, 14, 15, 15, 15, 15,
16, 16, 16, 16, 17, 17, 17, 18,
18, 18, 19, 19, 19, 20, 20, 20,
};
void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
{
cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]/2;
cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]/2;
cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
}
@ -719,8 +719,8 @@ static int rd_pick_intra4x4block(
return best_rd;
}
int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
int *rate_y, int *Distortion, int best_rd)
static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
int *rate_y, int *Distortion, int best_rd)
{
MACROBLOCKD *const xd = &mb->e_mbd;
int i;
@ -782,11 +782,13 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
MACROBLOCK *x,
int *Rate,
int *rate_y,
int *Distortion)
static int rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
MACROBLOCK *x,
int *Rate,
int *rate_y,
int *Distortion)
{
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
@ -858,7 +860,7 @@ static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distort
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
static void rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion)
{
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
@ -1795,7 +1797,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int distortion;
int best_rd = INT_MAX;
int best_intra_rd = INT_MAX;
int ref_frame_cost[MAX_REF_FRAMES];
int rate2, distortion2;
int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
@ -1872,36 +1873,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->skip = 0;
ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded);
// Special case treatment when GF and ARF are not sensible options for reference
if (cpi->ref_frame_flags == VP8_LAST_FLAG)
{
ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_zero(255);
ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(255)
+ vp8_cost_zero(128);
ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(255)
+ vp8_cost_one(128);
}
else
{
ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_zero(cpi->prob_last_coded);
ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(cpi->prob_last_coded)
+ vp8_cost_zero(cpi->prob_gf_coded);
ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded)
+ vp8_cost_one(cpi->prob_last_coded)
+ vp8_cost_one(cpi->prob_gf_coded);
}
vpx_memset(mode_mv, 0, sizeof(mode_mv));
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
@ -2024,7 +1999,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
int tmp_rd;
// Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
rate2 += rate;
distortion2 += distortion;
@ -2247,29 +2222,28 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
}
else if (x->encode_breakout)
{
int sum;
unsigned int sse;
unsigned int var;
int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >>4);
if(threshold < x->encode_breakout)
threshold = x->encode_breakout;
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
(x->src.y_buffer, x->src.y_stride,
x->e_mbd.predictor, 16, &sse, &sum);
var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
(x->src.y_buffer, x->src.y_stride,
x->e_mbd.predictor, 16, &sse);
if (sse < threshold)
{
// Check u and v to make sure skip is ok
int sse2 = 0;
unsigned int q2dc = xd->block[24].dequant[0];
/* If theres is no codeable 2nd order dc
or a very small uniform pixel change change */
if (abs(sum) < (xd->block[24].dequant[0]<<2)||
((sum * sum>>8) > sse && abs(sum) <128))
if ((sse - var < q2dc * q2dc >>4) ||
(sse /2 > var && sse-var < 64))
{
sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
// Check u and v to make sure skip is ok
int sse2= VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
if (sse2 * 2 < threshold)
{
x->skip = 1;
@ -2319,8 +2293,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
rate2 += other_cost;
}
// Estimate the reference frame signaling cost and add it to the rolling cost variable.
rate2 += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
/* Estimate the reference frame signaling cost and add it
* to the rolling cost variable.
*/
rate2 +=
x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
if (!disable_skip)
{
@ -2384,7 +2361,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
}
other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
other_cost +=
x->e_mbd.ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
/* Calculate the final y RD estimate for this mode */
best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost),
@ -2492,3 +2470,39 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
rd_update_mvcount(cpi, x, &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame]);
}
void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
{
int error4x4, error16x16;
int rate4x4, rate16x16 = 0, rateuv;
int dist4x4, dist16x16, distuv;
int rate;
int rate4x4_tokenonly = 0;
int rate16x16_tokenonly = 0;
int rateuv_tokenonly = 0;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
rate = rateuv;
error16x16 = rd_pick_intra16x16mby_mode(cpi, x,
&rate16x16, &rate16x16_tokenonly,
&dist16x16);
error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
&rate4x4, &rate4x4_tokenonly,
&dist4x4, error16x16);
if (error4x4 < error16x16)
{
x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
rate += rate4x4;
}
else
{
rate += rate16x16;
}
*rate_ = rate;
}

View File

@ -15,10 +15,8 @@
#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
extern int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd);
extern int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion);
extern void vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_to, int *distortion);
extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
extern void vp8_mv_pred
(

View File

@ -98,7 +98,6 @@ static void tokenize2nd_order_b
const BLOCKD *const b,
TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@ -120,9 +119,9 @@ static void tokenize2nd_order_b
{
int rc = vp8_default_zig_zag1d[c];
const int v = qcoeff_ptr[rc];
#if CONFIG_DEBUG
assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
#endif
t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
x = vp8_dct_value_tokens_ptr[v].Token;
}
@ -149,7 +148,6 @@ static void tokenize1st_order_b
const BLOCKD *const b,
TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@ -173,9 +171,9 @@ static void tokenize1st_order_b
{
int rc = vp8_default_zig_zag1d[c];
const int v = qcoeff_ptr[rc];
#if CONFIG_DEBUG
assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE));
#endif
t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
x = vp8_dct_value_tokens_ptr[v].Token;
}
@ -196,14 +194,11 @@ static void tokenize1st_order_b
}
static int mb_is_skippable(MACROBLOCKD *x)
static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block)
{
int has_y2_block;
int skip = 1;
int i = 0;
has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
&& x->mode_info_context->mbmi.mode != SPLITMV);
if (has_y2_block)
{
for (i = 0; i < 16; i++)
@ -223,8 +218,12 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
int plane_type;
int b;
int has_y2_block;
x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x);
has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
&& x->mode_info_context->mbmi.mode != SPLITMV);
x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block);
if (x->mode_info_context->mbmi.mb_skip_coeff)
{
cpi->skip_true_count++;
@ -241,29 +240,24 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
cpi->skip_false_count++;
#if 0
vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
#endif
if (x->mode_info_context->mbmi.mode == B_PRED || x->mode_info_context->mbmi.mode == SPLITMV)
plane_type = 3;
if(has_y2_block)
{
plane_type = 3;
}
else
{
tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
tokenize2nd_order_b(x->block + 24, t, 1,
A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0;
}
for (b = 0; b < 16; b++)
tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
tokenize1st_order_b(x->block + b, t, plane_type,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
tokenize1st_order_b(x->block + b, t, 2,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
@ -352,10 +346,7 @@ void vp8_tokenize_initialize()
static __inline void stuff2nd_order_b
(
const BLOCKD *const b,
TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@ -364,9 +355,6 @@ static __inline void stuff2nd_order_b
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
(void) frametype;
(void) type;
(void) b;
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
@ -382,10 +370,7 @@ static __inline void stuff2nd_order_b
static __inline void stuff1st_order_b
(
const BLOCKD *const b,
TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@ -394,9 +379,6 @@ static __inline void stuff1st_order_b
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
(void) frametype;
(void) type;
(void) b;
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt];
@ -411,10 +393,7 @@ static __inline void stuff1st_order_b
static __inline
void stuff1st_order_buv
(
const BLOCKD *const b,
TOKENEXTRA **tp,
const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
const FRAME_TYPE frametype,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
VP8_COMP *cpi
@ -423,9 +402,6 @@ void stuff1st_order_buv
int pt; /* near block/prev token context index */
TOKENEXTRA *t = *tp; /* store tokens starting here */
VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
(void) frametype;
(void) type;
(void) b;
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
@ -445,17 +421,17 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
int plane_type;
int b;
stuff2nd_order_b(x->block + 24, t, 1, x->frame_type,
stuff2nd_order_b(t,
A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0;
for (b = 0; b < 16; b++)
stuff1st_order_b(x->block + b, t, plane_type, x->frame_type,
stuff1st_order_b(t,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
stuff1st_order_buv(x->block + b, t, 2, x->frame_type,
stuff1st_order_buv(t,
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);

View File

@ -308,21 +308,6 @@ extern prototype_getmbss(vp8_variance_getmbss);
#endif
extern prototype_variance(vp8_variance_mse16x16);
#ifndef vp8_variance_get16x16prederror
#define vp8_variance_get16x16prederror vp8_get16x16pred_error_c
#endif
extern prototype_get16x16prederror(vp8_variance_get16x16prederror);
#ifndef vp8_variance_get8x8var
#define vp8_variance_get8x8var vp8_get8x8var_c
#endif
extern prototype_variance2(vp8_variance_get8x8var);
#ifndef vp8_variance_get16x16var
#define vp8_variance_get16x16var vp8_get16x16var_c
#endif
extern prototype_variance2(vp8_variance_get16x16var);
#ifndef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_c
#endif
@ -376,9 +361,6 @@ typedef struct
vp8_getmbss_fn_t getmbss;
vp8_variance_fn_t mse16x16;
vp8_get16x16prederror_fn_t get16x16prederror;
vp8_variance2_fn_t get8x8var;
vp8_variance2_fn_t get16x16var;
vp8_get16x16prederror_fn_t get4x4sse_cs;
vp8_sad_multi_fn_t sad16x16x3;

View File

@ -61,40 +61,6 @@ static void variance(
}
}
unsigned int
vp8_get8x8var_c
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
)
{
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
return (*SSE - (((*Sum) * (*Sum)) >> 6));
}
unsigned int
vp8_get16x16var_c
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *Sum
)
{
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
return (*SSE - (((*Sum) * (*Sum)) >> 8));
}
unsigned int vp8_variance16x16_c(
const unsigned char *src_ptr,

View File

@ -31,6 +31,12 @@ extern prototype_fdct(vp8_short_fdct8x4_mmx);
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
#undef vp8_fdct_fast4x4
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_mmx
#undef vp8_fdct_fast8x4
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_mmx
#endif
#endif

View File

@ -843,136 +843,6 @@ filter_block2d_bil_var_mmx_loop:
pop rbp
ret
;unsigned int vp8_get16x16pred_error_mmx
;(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride
;)
global sym(vp8_get16x16pred_error_mmx)
sym(vp8_get16x16pred_error_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rsi, arg(0) ;DWORD PTR [src_ptr]
mov rdi, arg(2) ;DWORD PTR [ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[src_stride]
movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
pxor mm0, mm0 ; clear xmm0 for unpack
pxor mm7, mm7 ; clear xmm7 for accumulating diffs
pxor mm6, mm6 ; clear xmm6 for accumulating sse
mov rcx, 16
var16loop:
movq mm1, [rsi]
movq mm2, [rdi]
movq mm3, mm1
movq mm4, mm2
punpcklbw mm1, mm0
punpckhbw mm3, mm0
punpcklbw mm2, mm0
punpckhbw mm4, mm0
psubw mm1, mm2
psubw mm3, mm4
paddw mm7, mm1
pmaddwd mm1, mm1
paddw mm7, mm3
pmaddwd mm3, mm3
paddd mm6, mm1
paddd mm6, mm3
movq mm1, [rsi+8]
movq mm2, [rdi+8]
movq mm3, mm1
movq mm4, mm2
punpcklbw mm1, mm0
punpckhbw mm3, mm0
punpcklbw mm2, mm0
punpckhbw mm4, mm0
psubw mm1, mm2
psubw mm3, mm4
paddw mm7, mm1
pmaddwd mm1, mm1
paddw mm7, mm3
pmaddwd mm3, mm3
paddd mm6, mm1
paddd mm6, mm3
add rsi, rax
add rdi, rdx
sub rcx, 1
jnz var16loop
movq mm1, mm6
pxor mm6, mm6
pxor mm5, mm5
punpcklwd mm6, mm7
punpckhwd mm5, mm7
psrad mm5, 16
psrad mm6, 16
paddd mm6, mm5
movq mm2, mm1
psrlq mm1, 32
paddd mm2, mm1
movq mm7, mm6
psrlq mm6, 32
paddd mm6, mm7
movd DWORD PTR [rsp], mm6 ;Sum
movd DWORD PTR [rsp+4], mm2 ;SSE
; return (SSE-((Sum*Sum)>>8));
movsxd rdx, dword ptr [rsp]
imul rdx, rdx
sar rdx, 8
movsxd rax, dword ptr [rsp + 4]
sub rax, rdx
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
;short mmx_bi_rd[4] = { 64, 64, 64, 64};

View File

@ -213,122 +213,6 @@ var16loop:
ret
;unsigned int vp8_get16x16pred_error_sse2
;(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride
;)
global sym(vp8_get16x16pred_error_sse2)
sym(vp8_get16x16pred_error_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[src_stride]
movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
mov rcx, 16
var16peloop:
movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
movdqa xmm3, xmm1
movdqa xmm4, xmm2
punpcklbw xmm1, xmm0
punpckhbw xmm3, xmm0
punpcklbw xmm2, xmm0
punpckhbw xmm4, xmm0
psubw xmm1, xmm2
psubw xmm3, xmm4
paddw xmm7, xmm1
pmaddwd xmm1, xmm1
paddw xmm7, xmm3
pmaddwd xmm3, xmm3
paddd xmm6, xmm1
paddd xmm6, xmm3
add rsi, rax
add rdi, rdx
sub rcx, 1
jnz var16peloop
movdqa xmm1, xmm6
pxor xmm6, xmm6
pxor xmm5, xmm5
punpcklwd xmm6, xmm7
punpckhwd xmm5, xmm7
psrad xmm5, 16
psrad xmm6, 16
paddd xmm6, xmm5
movdqa xmm2, xmm1
punpckldq xmm1, xmm0
punpckhdq xmm2, xmm0
movdqa xmm7, xmm6
paddd xmm1, xmm2
punpckldq xmm6, xmm0
punpckhdq xmm7, xmm0
paddd xmm6, xmm7
movdqa xmm2, xmm1
movdqa xmm7, xmm6
psrldq xmm1, 8
psrldq xmm6, 8
paddd xmm7, xmm6
paddd xmm1, xmm2
movd DWORD PTR [rsp], xmm7 ;Sum
movd DWORD PTR [rsp+4], xmm1 ;SSE
; return (SSE-((Sum*Sum)>>8));
movsxd rdx, dword ptr [rsp]
imul rdx, rdx
sar rdx, 8
movsxd rax, dword ptr [rsp + 4]
sub rax, rdx
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_get8x8var_sse2

View File

@ -76,43 +76,6 @@ extern void vp8_filter_block2d_bil_var_mmx
int *sum,
unsigned int *sumsquared
);
extern unsigned int vp8_get16x16pred_error_mmx
(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride
);
unsigned int vp8_get16x16var_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *SSE,
int *SUM
)
{
unsigned int sse0, sse1, sse2, sse3, var;
int sum0, sum1, sum2, sum3, avg;
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
var = sse0 + sse1 + sse2 + sse3;
avg = sum0 + sum1 + sum2 + sum3;
*SSE = var;
*SUM = avg;
return (var - ((avg * avg) >> 8));
}
unsigned int vp8_variance4x4_mmx(

View File

@ -53,13 +53,6 @@ unsigned int vp8_get16x16var_sse2
unsigned int *SSE,
int *Sum
);
unsigned int vp8_get16x16pred_error_sse2
(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride
);
unsigned int vp8_get8x8var_sse2
(
const unsigned char *src_ptr,

View File

@ -41,9 +41,7 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
extern prototype_getmbss(vp8_get_mb_ss_mmx);
extern prototype_variance(vp8_mse16x16_mmx);
extern prototype_get16x16prederror(vp8_get16x16pred_error_mmx);
extern prototype_variance2(vp8_get8x8var_mmx);
extern prototype_variance2(vp8_get16x16var_mmx);
extern prototype_get16x16prederror(vp8_get4x4sse_cs_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
@ -110,15 +108,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_mmx);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_mmx
#undef vp8_variance_get16x16prederror
#define vp8_variance_get16x16prederror vp8_get16x16pred_error_mmx
#undef vp8_variance_get8x8var
#define vp8_variance_get8x8var vp8_get8x8var_mmx
#undef vp8_variance_get16x16var
#define vp8_variance_get16x16var vp8_get16x16var_mmx
#undef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_mmx
@ -148,7 +137,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
extern prototype_getmbss(vp8_get_mb_ss_sse2);
extern prototype_variance(vp8_mse16x16_wmt);
extern prototype_get16x16prederror(vp8_get16x16pred_error_sse2);
extern prototype_variance2(vp8_get8x8var_sse2);
extern prototype_variance2(vp8_get16x16var_sse2);
@ -216,15 +204,6 @@ extern prototype_variance2(vp8_get16x16var_sse2);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_wmt
#undef vp8_variance_get16x16prederror
#define vp8_variance_get16x16prederror vp8_get16x16pred_error_sse2
#undef vp8_variance_get8x8var
#define vp8_variance_get8x8var vp8_get8x8var_sse2
#undef vp8_variance_get16x16var
#define vp8_variance_get16x16var vp8_get16x16var_sse2
#endif
#endif

View File

@ -16,7 +16,7 @@
#if HAVE_MMX
static void short_fdct8x4_mmx(short *input, short *output, int pitch)
void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
{
vp8_short_fdct4x4_mmx(input, output, pitch);
vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
@ -26,7 +26,7 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
short *qcoeff_ptr, short *dequant_ptr,
short *scan_mask, short *round_ptr,
short *quant_ptr, short *dqcoeff_ptr);
static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
{
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
short *coeff_ptr = b->coeff;
@ -51,7 +51,7 @@ static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
}
int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
static int mbblock_error_mmx(MACROBLOCK *mb, int dc)
int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
{
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
@ -59,7 +59,7 @@ static int mbblock_error_mmx(MACROBLOCK *mb, int dc)
}
int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
static int mbuverror_mmx(MACROBLOCK *mb)
int vp8_mbuverror_mmx(MACROBLOCK *mb)
{
short *s_ptr = &mb->coeff[256];
short *d_ptr = &mb->e_mbd.dqcoeff[256];
@ -69,7 +69,7 @@ static int mbuverror_mmx(MACROBLOCK *mb)
void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);
static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
{
unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride;
@ -82,7 +82,7 @@ static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
#if HAVE_SSE2
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
static int mbblock_error_xmm(MACROBLOCK *mb, int dc)
int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
{
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
@ -90,7 +90,7 @@ static int mbblock_error_xmm(MACROBLOCK *mb, int dc)
}
int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
static int mbuverror_xmm(MACROBLOCK *mb)
int vp8_mbuverror_xmm(MACROBLOCK *mb)
{
short *s_ptr = &mb->coeff[256];
short *d_ptr = &mb->e_mbd.dqcoeff[256];
@ -100,7 +100,7 @@ static int mbuverror_xmm(MACROBLOCK *mb)
void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
short *diff, unsigned char *predictor,
int pitch);
static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
{
unsigned char *z = *(be->base_src) + be->src;
unsigned int src_stride = be->src_stride;
@ -175,26 +175,23 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx;
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_mmx;
cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx;
cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
cpi->rtcd.fdct.short8x4 = short_fdct8x4_mmx;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
cpi->rtcd.fdct.fast8x4 = short_fdct8x4_mmx;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
cpi->rtcd.encodemb.berr = vp8_block_error_mmx;
cpi->rtcd.encodemb.mberr = mbblock_error_mmx;
cpi->rtcd.encodemb.mbuverr = mbuverror_mmx;
cpi->rtcd.encodemb.subb = subtract_b_mmx;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx;
cpi->rtcd.encodemb.subb = vp8_subtract_b_mmx;
cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx;
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx;
/*cpi->rtcd.quantize.fastquantb = fast_quantize_b_mmx;*/
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
}
#endif
@ -226,11 +223,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2;
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_sse2;
cpi->rtcd.variance.get8x8var = vp8_get8x8var_sse2;
cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2;
/* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2;
@ -241,9 +233,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ;
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
cpi->rtcd.encodemb.mberr = mbblock_error_xmm;
cpi->rtcd.encodemb.mbuverr = mbuverror_xmm;
cpi->rtcd.encodemb.subb = subtract_b_sse2;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm;
cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2;
cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2;
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2;

31
vpx_ports/asm_offsets.h Normal file
View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VPX_PORTS_ASM_OFFSETS_H
#define VPX_PORTS_ASM_OFFSETS_H
#include <stddef.h>
#define ct_assert(name,cond) \
static void assert_##name(void) UNUSED;\
static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
#if INLINE_ASM
#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val));
#define BEGIN int main(void) {
#define END return 0; }
#else
#define DEFINE(sym, val) int sym = val;
#define BEGIN
#define END
#endif
#endif /* VPX_PORTS_ASM_OFFSETS_H */