Merge remote branch 'internal/upstream' into HEAD
Conflicts: vp8/encoder/onyx_if.c Change-Id: Ieef9a58a2effdc68cf52bc5f14d90c31a1dbc13a
This commit is contained in:
@@ -19,14 +19,6 @@
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
|
||||
|
||||
void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -106,31 +98,12 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
rtcd->recon.recon2 = vp8_recon2b_neon;
|
||||
rtcd->recon.recon4 = vp8_recon4b_neon;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_neon;
|
||||
|
||||
rtcd->recon.build_intra_predictors_mby =
|
||||
vp8_build_intra_predictors_mby_neon;
|
||||
rtcd->recon.build_intra_predictors_mby_s =
|
||||
vp8_build_intra_predictors_mby_s_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_media)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_neon)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr =
|
||||
vp8_build_intra_predictors_mby_neon;
|
||||
vp8_build_intra_predictors_mby_s_ptr =
|
||||
vp8_build_intra_predictors_mby_s_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@@ -53,6 +53,9 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
|
||||
|
||||
extern prototype_recon_macroblock(vp8_recon_mb_neon);
|
||||
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_neon);
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_recon_recon
|
||||
#define vp8_recon_recon vp8_recon_b_neon
|
||||
@@ -74,6 +77,13 @@ extern prototype_recon_macroblock(vp8_recon_mb_neon);
|
||||
|
||||
#undef vp8_recon_recon_mb
|
||||
#define vp8_recon_recon_mb vp8_recon_mb_neon
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mby
|
||||
#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_neon
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mby_s
|
||||
#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_neon
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@@ -20,12 +20,6 @@
|
||||
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
|
||||
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
|
||||
|
||||
void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
|
||||
void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -45,6 +39,10 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
rtcd->recon.recon4 = vp8_recon4b_c;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_c;
|
||||
rtcd->recon.recon_mby = vp8_recon_mby_c;
|
||||
rtcd->recon.build_intra_predictors_mby =
|
||||
vp8_build_intra_predictors_mby;
|
||||
rtcd->recon.build_intra_predictors_mby_s =
|
||||
vp8_build_intra_predictors_mby_s;
|
||||
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
|
||||
@@ -75,9 +73,6 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/* Pure C: */
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
vp8_arch_x86_common_init(ctx);
|
||||
|
@@ -23,6 +23,9 @@
|
||||
#define prototype_recon_macroblock(sym) \
|
||||
void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x)
|
||||
|
||||
#define prototype_build_intra_predictors(sym) \
|
||||
void sym(MACROBLOCKD *x)
|
||||
|
||||
struct vp8_recon_rtcd_vtable;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
@@ -73,9 +76,23 @@ extern prototype_recon_macroblock(vp8_recon_recon_mb);
|
||||
#endif
|
||||
extern prototype_recon_macroblock(vp8_recon_recon_mby);
|
||||
|
||||
#ifndef vp8_recon_build_intra_predictors_mby
|
||||
#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby
|
||||
#endif
|
||||
extern prototype_build_intra_predictors\
|
||||
(vp8_recon_build_intra_predictors_mby);
|
||||
|
||||
#ifndef vp8_recon_build_intra_predictors_mby_s
|
||||
#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s
|
||||
#endif
|
||||
extern prototype_build_intra_predictors\
|
||||
(vp8_recon_build_intra_predictors_mby_s);
|
||||
|
||||
|
||||
typedef prototype_copy_block((*vp8_copy_block_fn_t));
|
||||
typedef prototype_recon_block((*vp8_recon_fn_t));
|
||||
typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
|
||||
typedef prototype_build_intra_predictors((*vp8_build_intra_pred_fn_t));
|
||||
typedef struct vp8_recon_rtcd_vtable
|
||||
{
|
||||
vp8_copy_block_fn_t copy16x16;
|
||||
@@ -86,6 +103,8 @@ typedef struct vp8_recon_rtcd_vtable
|
||||
vp8_recon_fn_t recon4;
|
||||
vp8_recon_mb_fn_t recon_mb;
|
||||
vp8_recon_mb_fn_t recon_mby;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mby_s;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mby;
|
||||
} vp8_recon_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@@ -14,13 +14,6 @@
|
||||
|
||||
extern void init_intra_left_above_pixels(MACROBLOCKD *x);
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
|
||||
extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
|
||||
|
||||
extern void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x);
|
||||
|
||||
|
@@ -115,8 +115,8 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
{
|
||||
|
||||
vp8_build_intra_predictors_mbuv_s(xd);
|
||||
vp8_build_intra_predictors_mby_s_ptr(xd);
|
||||
|
||||
RECON_INVOKE(&pbi->common.rtcd.recon,
|
||||
build_intra_predictors_mby_s)(xd);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -214,7 +214,8 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode != B_PRED)
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr(xd);
|
||||
RECON_INVOKE(&pbi->common.rtcd.recon,
|
||||
build_intra_predictors_mby)(xd);
|
||||
} else {
|
||||
vp8_intra_prediction_down_copy(xd);
|
||||
}
|
||||
|
@@ -71,8 +71,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/
|
||||
|
||||
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/
|
||||
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;*/
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_armv6;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
224
vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
Normal file
224
vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
Normal file
@@ -0,0 +1,224 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_fast_quantize_b_armv6|
|
||||
|
||||
INCLUDE asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 BLOCK *b
|
||||
; r1 BLOCKD *d
|
||||
|vp8_fast_quantize_b_armv6| PROC
|
||||
stmfd sp!, {r1, r4-r11, lr}
|
||||
|
||||
ldr r3, [r0, #vp8_block_coeff] ; coeff
|
||||
ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
|
||||
ldr r5, [r0, #vp8_block_round] ; round
|
||||
ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
|
||||
ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
|
||||
ldr r8, [r1, #vp8_blockd_dequant] ; dequant
|
||||
|
||||
ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
|
||||
; is used to update the counter so that
|
||||
; it can be used to mark nonzero
|
||||
; quantized coefficient pairs.
|
||||
|
||||
mov r1, #0 ; flags for quantized coeffs
|
||||
|
||||
; PART 1: quantization and dequantization loop
|
||||
loop
|
||||
ldr r9, [r3], #4 ; [z1 | z0]
|
||||
ldr r10, [r5], #4 ; [r1 | r0]
|
||||
ldr r11, [r4], #4 ; [q1 | q0]
|
||||
|
||||
ssat16 lr, #1, r9 ; [sz1 | sz0]
|
||||
eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
|
||||
ssub16 r9, r9, lr ; x = (z ^ sz) - sz
|
||||
sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
|
||||
|
||||
ldr r12, [r3], #4 ; [z3 | z2]
|
||||
|
||||
smulbb r0, r9, r11 ; [(x0+r0)*q0]
|
||||
smultt r9, r9, r11 ; [(x1+r1)*q1]
|
||||
|
||||
ldr r10, [r5], #4 ; [r3 | r2]
|
||||
|
||||
ssat16 r11, #1, r12 ; [sz3 | sz2]
|
||||
eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
|
||||
pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
|
||||
ldr r9, [r4], #4 ; [q3 | q2]
|
||||
ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
|
||||
|
||||
sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
|
||||
|
||||
eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
|
||||
|
||||
smulbb r10, r12, r9 ; [(x2+r2)*q2]
|
||||
smultt r12, r12, r9 ; [(x3+r3)*q3]
|
||||
|
||||
ssub16 r0, r0, lr ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r0, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
|
||||
|
||||
str r0, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r9, [r8], #4 ; [dq1 | dq0]
|
||||
|
||||
pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
|
||||
eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
|
||||
ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r10, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
|
||||
|
||||
str r10, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r11, [r8], #4 ; [dq3 | dq2]
|
||||
|
||||
smulbb r12, r0, r9 ; [x0*dq0]
|
||||
smultt r0, r0, r9 ; [x1*dq1]
|
||||
|
||||
smulbb r9, r10, r11 ; [x2*dq2]
|
||||
smultt r10, r10, r11 ; [x3*dq3]
|
||||
|
||||
lsls r2, r2, #2 ; update loop counter
|
||||
strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
|
||||
strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
|
||||
strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
|
||||
strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
|
||||
add r7, r7, #8 ; dqcoeff += 8
|
||||
bne loop
|
||||
|
||||
; PART 2: check position for eob...
|
||||
mov lr, #0 ; init eob
|
||||
cmp r1, #0 ; coeffs after quantization?
|
||||
ldr r11, [sp, #0] ; restore BLOCKD pointer
|
||||
beq end ; skip eob calculations if all zero
|
||||
|
||||
ldr r0, [r11, #vp8_blockd_qcoeff]
|
||||
|
||||
; check shortcut for nonzero qcoeffs
|
||||
tst r1, #0x80
|
||||
bne quant_coeff_15_14
|
||||
tst r1, #0x20
|
||||
bne quant_coeff_13_11
|
||||
tst r1, #0x8
|
||||
bne quant_coeff_12_7
|
||||
tst r1, #0x40
|
||||
bne quant_coeff_10_9
|
||||
tst r1, #0x10
|
||||
bne quant_coeff_8_3
|
||||
tst r1, #0x2
|
||||
bne quant_coeff_6_5
|
||||
tst r1, #0x4
|
||||
bne quant_coeff_4_2
|
||||
b quant_coeff_1_0
|
||||
|
||||
quant_coeff_15_14
|
||||
ldrh r2, [r0, #30] ; rc=15, i=15
|
||||
mov lr, #16
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #28] ; rc=14, i=14
|
||||
mov lr, #15
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_13_11
|
||||
ldrh r2, [r0, #22] ; rc=11, i=13
|
||||
mov lr, #14
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_12_7
|
||||
ldrh r3, [r0, #14] ; rc=7, i=12
|
||||
mov lr, #13
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #20] ; rc=10, i=11
|
||||
mov lr, #12
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_10_9
|
||||
ldrh r3, [r0, #26] ; rc=13, i=10
|
||||
mov lr, #11
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #24] ; rc=12, i=9
|
||||
mov lr, #10
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_8_3
|
||||
ldrh r3, [r0, #18] ; rc=9, i=8
|
||||
mov lr, #9
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #12] ; rc=6, i=7
|
||||
mov lr, #8
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_6_5
|
||||
ldrh r3, [r0, #6] ; rc=3, i=6
|
||||
mov lr, #7
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #4] ; rc=2, i=5
|
||||
mov lr, #6
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_4_2
|
||||
ldrh r3, [r0, #10] ; rc=5, i=4
|
||||
mov lr, #5
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #16] ; rc=8, i=3
|
||||
mov lr, #4
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #8] ; rc=4, i=2
|
||||
mov lr, #3
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_1_0
|
||||
ldrh r2, [r0, #2] ; rc=1, i=1
|
||||
mov lr, #2
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
mov lr, #1 ; rc=0, i=0
|
||||
|
||||
end
|
||||
str lr, [r11, #vp8_blockd_eob]
|
||||
ldmfd sp!, {r1, r4-r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
loop_count
|
||||
DCD 0x1000000
|
||||
|
||||
END
|
||||
|
@@ -12,6 +12,16 @@
|
||||
#ifndef QUANTIZE_ARM_H
|
||||
#define QUANTIZE_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
|
||||
extern prototype_quantize_block(vp8_fast_quantize_b_armv6);
|
||||
|
||||
#undef vp8_quantize_fastquantb
|
||||
#define vp8_quantize_fastquantb vp8_fast_quantize_b_armv6
|
||||
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_quantize_block(vp8_fast_quantize_b_neon);
|
||||
|
||||
|
@@ -65,6 +65,17 @@ DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
|
||||
|
||||
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
|
||||
// offsets from BLOCK structure
|
||||
DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
|
||||
DEFINE(vp8_block_quant_fast, offsetof(BLOCK, quant_fast));
|
||||
DEFINE(vp8_block_round, offsetof(BLOCK, round));
|
||||
|
||||
// offsets from BLOCKD structure
|
||||
DEFINE(vp8_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
|
||||
DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
|
||||
DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
|
||||
DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
|
||||
|
||||
// These two sizes are used in vp8cx_pack_tokens. They are hard coded
|
||||
// so if the size changes this will have to be adjusted.
|
||||
#if HAVE_ARMV5TE
|
||||
|
@@ -1322,7 +1322,8 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
|
||||
int distortion2;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mode = mode;
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
|
||||
rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
|
||||
this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
|
@@ -80,7 +80,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
int b;
|
||||
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
|
||||
|
||||
|
@@ -24,6 +24,35 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
extern void vp8_build_block_offsets(MACROBLOCK *x);
|
||||
extern void vp8_setup_block_ptrs(MACROBLOCK *x);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
|
||||
extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
|
||||
|
||||
static THREAD_FUNCTION loopfilter_thread(void *p_data)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (cpi->b_multi_threaded == 0)
|
||||
break;
|
||||
|
||||
if (sem_wait(&cpi->h_event_start_lpf) == 0)
|
||||
{
|
||||
if (cpi->b_multi_threaded == FALSE) // we're shutting down
|
||||
break;
|
||||
|
||||
loopfilter_frame(cpi, cm);
|
||||
|
||||
sem_post(&cpi->h_event_end_lpf);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
{
|
||||
@@ -479,6 +508,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
|
||||
}
|
||||
|
||||
{
|
||||
LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;
|
||||
|
||||
sem_init(&cpi->h_event_start_lpf, 0, 0);
|
||||
sem_init(&cpi->h_event_end_lpf, 0, 0);
|
||||
|
||||
lpfthd->ptr1 = (void *)cpi;
|
||||
pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -500,9 +538,14 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
|
||||
|
||||
sem_destroy(&cpi->h_event_start_encoding[i]);
|
||||
}
|
||||
|
||||
sem_post(&cpi->h_event_start_lpf);
|
||||
pthread_join(cpi->h_filter_thread, 0);
|
||||
}
|
||||
|
||||
sem_destroy(&cpi->h_event_end_encoding);
|
||||
sem_destroy(&cpi->h_event_end_lpf);
|
||||
sem_destroy(&cpi->h_event_start_lpf);
|
||||
|
||||
//free thread related resources
|
||||
vpx_free(cpi->h_event_start_encoding);
|
||||
|
@@ -841,10 +841,10 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
|
||||
|
||||
#define BASE_ERRPERMB 150
|
||||
static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width)
|
||||
static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb;
|
||||
|
||||
double err_per_mb = section_err / num_mbs;
|
||||
@@ -941,10 +941,10 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
|
||||
|
||||
return Q;
|
||||
}
|
||||
static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width)
|
||||
static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb;
|
||||
|
||||
double err_per_mb = section_err / num_mbs;
|
||||
@@ -992,10 +992,10 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
|
||||
}
|
||||
|
||||
// Estimate a worst case Q for a KF group
|
||||
static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width, double group_iiratio)
|
||||
static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb = (512 * section_target_bandwitdh) / num_mbs;
|
||||
int bits_per_mb_at_this_q;
|
||||
|
||||
@@ -1090,11 +1090,10 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
|
||||
|
||||
// For cq mode estimate a cq level that matches the observed
|
||||
// complexity and data rate.
|
||||
static int estimate_cq(VP8_COMP *cpi, double section_err,
|
||||
int section_target_bandwitdh, int Height, int Width)
|
||||
static int estimate_cq(VP8_COMP *cpi, double section_err, int section_target_bandwitdh)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb;
|
||||
|
||||
double err_per_mb = section_err / num_mbs;
|
||||
@@ -1608,7 +1607,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
arf_frame_bits = (int)((double)Boost * (group_bits / (double)allocation_chunks));
|
||||
|
||||
// Estimate if there are enough bits available to make worthwhile use of an arf.
|
||||
tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits, cpi->common.Height, cpi->common.Width);
|
||||
tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits);
|
||||
|
||||
// Only use an arf if it is likely we will be able to code it at a lower Q than the surrounding frames.
|
||||
if (tmp_q < cpi->worst_quality)
|
||||
@@ -2112,8 +2111,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
est_cq =
|
||||
estimate_cq( cpi,
|
||||
(cpi->total_coded_error_left / frames_left),
|
||||
(int)(cpi->bits_left / frames_left),
|
||||
cpi->common.Height, cpi->common.Width);
|
||||
(int)(cpi->bits_left / frames_left));
|
||||
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
if ( est_cq > cpi->cq_target_quality )
|
||||
@@ -2125,9 +2123,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
cpi->maxq_min_limit = cpi->best_quality;
|
||||
tmp_q = estimate_max_q( cpi,
|
||||
(cpi->total_coded_error_left / frames_left),
|
||||
(int)(cpi->bits_left / frames_left),
|
||||
cpi->common.Height,
|
||||
cpi->common.Width);
|
||||
(int)(cpi->bits_left / frames_left));
|
||||
|
||||
// Limit the maxq value returned subsequently.
|
||||
// This increases the risk of overspend or underspend if the initial
|
||||
@@ -2155,7 +2151,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (frames_left < 1)
|
||||
frames_left = 1;
|
||||
|
||||
tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left), cpi->common.Height, cpi->common.Width);
|
||||
tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left));
|
||||
|
||||
// Move active_worst_quality but in a damped way
|
||||
if (tmp_q > cpi->active_worst_quality)
|
||||
@@ -2764,7 +2760,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
|
||||
|
||||
// Work out if spatial resampling is necessary
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, new_height, new_width, group_iiratio);
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio);
|
||||
|
||||
// If we project a required Q higher than the maximum allowed Q then make a guess at the actual size of frames in this section
|
||||
projected_bits_perframe = bits_per_frame;
|
||||
@@ -2835,7 +2831,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0;
|
||||
|
||||
// Now try again and see what Q we get with the smaller image size
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, new_height, new_width, group_iiratio);
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio);
|
||||
|
||||
if (0)
|
||||
{
|
||||
|
@@ -103,6 +103,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
// Pure C:
|
||||
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
cpi->rtcd.variance.ssimpf_8x8 = ssim_parms_8x8_c;
|
||||
cpi->rtcd.variance.ssimpf = ssim_parms_c;
|
||||
#endif
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
vp8_arch_x86_encoder_init(cpi);
|
||||
|
@@ -1415,7 +1415,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
unsigned short sad_array8[8];
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
|
||||
unsigned int sad_array[3];
|
||||
|
||||
// Work out the mid point for the search
|
||||
|
@@ -86,9 +86,11 @@ extern double vp8_calc_ssim
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
int lumamask,
|
||||
double *weight
|
||||
double *weight,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd
|
||||
);
|
||||
|
||||
|
||||
extern double vp8_calc_ssimg
|
||||
(
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
@@ -1522,8 +1524,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
VP8_COMP *cpi = (VP8_COMP *)(ptr);
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
|
||||
if (!cpi)
|
||||
return;
|
||||
cpi->oxcf = *oxcf;
|
||||
|
||||
cpi->auto_gold = 1;
|
||||
cpi->auto_adjust_gold_quantizer = 1;
|
||||
@@ -1535,50 +1536,15 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->version = oxcf->Version;
|
||||
vp8_setup_version(cm);
|
||||
|
||||
if (oxcf == 0)
|
||||
{
|
||||
cpi->pass = 0;
|
||||
// change includes all joint functionality
|
||||
vp8_change_config(ptr, oxcf);
|
||||
|
||||
cpi->auto_worst_q = 0;
|
||||
cpi->oxcf.best_allowed_q = MINQ;
|
||||
cpi->oxcf.worst_allowed_q = MAXQ;
|
||||
cpi->oxcf.cq_level = MINQ;
|
||||
// Initialize active best and worst q and average q values.
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
|
||||
cpi->oxcf.end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
cpi->oxcf.starting_buffer_level = 4000;
|
||||
cpi->oxcf.optimal_buffer_level = 5000;
|
||||
cpi->oxcf.maximum_buffer_size = 6000;
|
||||
cpi->oxcf.under_shoot_pct = 90;
|
||||
cpi->oxcf.allow_df = 0;
|
||||
cpi->oxcf.drop_frames_water_mark = 20;
|
||||
|
||||
cpi->oxcf.allow_spatial_resampling = 0;
|
||||
cpi->oxcf.resample_down_water_mark = 40;
|
||||
cpi->oxcf.resample_up_water_mark = 60;
|
||||
|
||||
cpi->oxcf.fixed_q = cpi->interquantizer;
|
||||
|
||||
cpi->filter_type = NORMAL_LOOPFILTER;
|
||||
|
||||
if (cm->simpler_lpf)
|
||||
cpi->filter_type = SIMPLE_LOOPFILTER;
|
||||
|
||||
cpi->compressor_speed = 1;
|
||||
cpi->horiz_scale = 0;
|
||||
cpi->vert_scale = 0;
|
||||
cpi->oxcf.two_pass_vbrbias = 50;
|
||||
cpi->oxcf.two_pass_vbrmax_section = 400;
|
||||
cpi->oxcf.two_pass_vbrmin_section = 0;
|
||||
|
||||
cpi->oxcf.Sharpness = 0;
|
||||
cpi->oxcf.noise_sensitivity = 0;
|
||||
}
|
||||
else
|
||||
cpi->oxcf = *oxcf;
|
||||
|
||||
|
||||
// Convert target bandwidth from Kbit/s to Bit/s
|
||||
cpi->oxcf.target_bandwidth *= 1000;
|
||||
// Initialise the starting buffer levels
|
||||
cpi->oxcf.starting_buffer_level =
|
||||
rescale(cpi->oxcf.starting_buffer_level,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
@@ -1586,10 +1552,6 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
||||
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
|
||||
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
@@ -1598,11 +1560,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cpi->total_actual_bits = 0;
|
||||
cpi->total_target_vs_actual = 0;
|
||||
|
||||
// change includes all joint functionality
|
||||
vp8_change_config(ptr, oxcf);
|
||||
|
||||
#if VP8_TEMPORAL_ALT_REF
|
||||
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1726,7 +1684,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
|
||||
}
|
||||
|
||||
cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
||||
cpi->baseline_gf_interval =
|
||||
cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
||||
|
||||
cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
|
||||
|
||||
@@ -1737,7 +1696,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->refresh_entropy_probs = 1;
|
||||
|
||||
if (cpi->oxcf.token_partitions >= 0 && cpi->oxcf.token_partitions <= 3)
|
||||
cm->multi_token_partition = (TOKEN_PARTITION) cpi->oxcf.token_partitions;
|
||||
cm->multi_token_partition =
|
||||
(TOKEN_PARTITION) cpi->oxcf.token_partitions;
|
||||
|
||||
setup_features(cpi);
|
||||
|
||||
@@ -1758,12 +1718,12 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cpi->oxcf.starting_buffer_level = 60000;
|
||||
cpi->oxcf.optimal_buffer_level = 60000;
|
||||
cpi->oxcf.maximum_buffer_size = 240000;
|
||||
|
||||
}
|
||||
|
||||
// Convert target bandwidth from Kbit/s to Bit/s
|
||||
cpi->oxcf.target_bandwidth *= 1000;
|
||||
|
||||
// Set or reset optimal and maximum buffer levels.
|
||||
if (cpi->oxcf.optimal_buffer_level == 0)
|
||||
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
||||
else
|
||||
@@ -1778,7 +1738,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
rescale(cpi->oxcf.maximum_buffer_size,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
// Set up frame rate and related parameters rate control values.
|
||||
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
||||
|
||||
// Set absolute upper and lower quality limits
|
||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
||||
|
||||
@@ -1807,9 +1770,9 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
|
||||
// Only allow dropped frames in buffered mode
|
||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||
|
||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
||||
|
||||
if (!cm->use_bilinear_mc_filter)
|
||||
cm->mcomp_filter_type = SIXTAP;
|
||||
@@ -1824,7 +1787,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->horiz_scale = cpi->horiz_scale;
|
||||
cm->vert_scale = cpi->vert_scale ;
|
||||
|
||||
cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000; // As per VP8
|
||||
// As per VP8
|
||||
cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000;
|
||||
|
||||
// VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
|
||||
if (cpi->oxcf.Sharpness > 7)
|
||||
@@ -1845,8 +1809,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
|
||||
}
|
||||
|
||||
if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
if (((cm->Width + 15) & 0xfffffff0) !=
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & 0xfffffff0) !=
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
|
||||
{
|
||||
alloc_raw_frame_buffers(cpi);
|
||||
@@ -3340,6 +3306,89 @@ static BOOL recode_loop_test( VP8_COMP *cpi,
|
||||
return force_recode;
|
||||
}
|
||||
|
||||
void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
{
|
||||
if (cm->no_lpf)
|
||||
{
|
||||
cm->filter_level = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
struct vpx_usec_timer timer;
|
||||
|
||||
vp8_clear_system_state();
|
||||
|
||||
vpx_usec_timer_start(&timer);
|
||||
if (cpi->sf.auto_filter == 0)
|
||||
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
|
||||
|
||||
else
|
||||
vp8cx_pick_filter_level(cpi->Source, cpi);
|
||||
|
||||
vpx_usec_timer_mark(&timer);
|
||||
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
|
||||
#endif
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
{
|
||||
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
|
||||
|
||||
{
|
||||
YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
|
||||
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
|
||||
YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
|
||||
YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
|
||||
// At this point the new frame has been encoded.
|
||||
// If any buffer copy / swapping is signaled it should be done here.
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
|
||||
}
|
||||
else // For non key frames
|
||||
{
|
||||
// Code to copy between reference buffers
|
||||
if (cm->copy_buffer_to_arf)
|
||||
{
|
||||
if (cm->copy_buffer_to_arf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_arf == 2)
|
||||
vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
|
||||
}
|
||||
|
||||
if (cm->copy_buffer_to_gf)
|
||||
{
|
||||
if (cm->copy_buffer_to_gf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_gf == 2)
|
||||
vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void encode_frame_to_data_rate
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
@@ -3698,11 +3747,12 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
}
|
||||
|
||||
// If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames
|
||||
// to prevent bits just going to waste.
|
||||
// If CBR and the buffer is as full then it is reasonable to allow
|
||||
// higher quality on the frames to prevent bits just going to waste.
|
||||
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
|
||||
{
|
||||
// Note that the use of >= here elliminates the risk of a devide by 0 error in the else if clause
|
||||
// Note that the use of >= here elliminates the risk of a devide
|
||||
// by 0 error in the else if clause
|
||||
if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size)
|
||||
cpi->active_best_quality = cpi->best_quality;
|
||||
|
||||
@@ -3715,6 +3765,20 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
}
|
||||
}
|
||||
// Make sure constrained quality mode limits are adhered to for the first
|
||||
// few frames of one pass encodes
|
||||
else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
|
||||
{
|
||||
if ( (cm->frame_type == KEY_FRAME) ||
|
||||
cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame )
|
||||
{
|
||||
cpi->active_best_quality = cpi->best_quality;
|
||||
}
|
||||
else if (cpi->active_best_quality < cpi->cq_target_quality)
|
||||
{
|
||||
cpi->active_best_quality = cpi->cq_target_quality;
|
||||
}
|
||||
}
|
||||
|
||||
// Clip the active best and worst quality values to limits
|
||||
if (cpi->active_worst_quality > cpi->worst_quality)
|
||||
@@ -3895,6 +3959,7 @@ static void encode_frame_to_data_rate
|
||||
|
||||
// transform / motion compensation build reconstruction frame
|
||||
vp8_encode_frame(cpi);
|
||||
|
||||
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
|
||||
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
|
||||
|
||||
@@ -4254,92 +4319,43 @@ static void encode_frame_to_data_rate
|
||||
else
|
||||
cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
|
||||
|
||||
if (cm->no_lpf)
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded)
|
||||
{
|
||||
cm->filter_level = 0;
|
||||
sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
struct vpx_usec_timer timer;
|
||||
|
||||
vpx_usec_timer_start(&timer);
|
||||
|
||||
if (cpi->sf.auto_filter == 0)
|
||||
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
|
||||
else
|
||||
vp8cx_pick_filter_level(cpi->Source, cpi);
|
||||
|
||||
vpx_usec_timer_mark(&timer);
|
||||
|
||||
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
|
||||
loopfilter_frame(cpi, cm);
|
||||
}
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
{
|
||||
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
/* Move storing frame_type out of the above loop since it is also
|
||||
* needed in motion search besides loopfilter */
|
||||
cm->last_frame_type = cm->frame_type;
|
||||
|
||||
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
|
||||
|
||||
if (cpi->oxcf.error_resilient_mode == 1)
|
||||
{
|
||||
cm->refresh_entropy_probs = 0;
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* wait that filter_level is picked so that we can continue with stream packing */
|
||||
if (cpi->b_multi_threaded)
|
||||
sem_wait(&cpi->h_event_end_lpf);
|
||||
#endif
|
||||
|
||||
// build the bitstream
|
||||
vp8_pack_bitstream(cpi, dest, size);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* wait for loopfilter thread done */
|
||||
if (cpi->b_multi_threaded)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
|
||||
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
|
||||
YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
|
||||
YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
|
||||
// At this point the new frame has been encoded coded.
|
||||
// If any buffer copy / swaping is signalled it should be done here.
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
|
||||
}
|
||||
else // For non key frames
|
||||
{
|
||||
// Code to copy between reference buffers
|
||||
if (cm->copy_buffer_to_arf)
|
||||
{
|
||||
if (cm->copy_buffer_to_arf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_arf == 2)
|
||||
vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
|
||||
}
|
||||
|
||||
if (cm->copy_buffer_to_gf)
|
||||
{
|
||||
if (cm->copy_buffer_to_gf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_gf == 2)
|
||||
vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
|
||||
}
|
||||
}
|
||||
sem_wait(&cpi->h_event_end_lpf);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Move storing frame_type out of the above loop since it is also
|
||||
* needed in motion search besides loopfilter */
|
||||
cm->last_frame_type = cm->frame_type;
|
||||
|
||||
// Update rate control heuristics
|
||||
cpi->total_byte_count += (*size);
|
||||
@@ -5179,7 +5195,9 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
|
||||
|
||||
if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame)
|
||||
{
|
||||
generate_psnr_packet(cpi);
|
||||
}
|
||||
|
||||
#if CONFIG_PSNR
|
||||
|
||||
@@ -5195,12 +5213,35 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
if (cpi->b_calculate_psnr)
|
||||
{
|
||||
double y, u, v;
|
||||
double sq_error;
|
||||
double frame_psnr = vp8_calc_psnr(cpi->Source, cm->frame_to_show, &y, &u, &v, &sq_error);
|
||||
double ye,ue,ve;
|
||||
double frame_psnr;
|
||||
YV12_BUFFER_CONFIG *orig = cpi->Source;
|
||||
YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
|
||||
YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
|
||||
int y_samples = orig->y_height * orig->y_width ;
|
||||
int uv_samples = orig->uv_height * orig->uv_width ;
|
||||
int t_samples = y_samples + 2 * uv_samples;
|
||||
long long sq_error;
|
||||
|
||||
cpi->total_y += y;
|
||||
cpi->total_u += u;
|
||||
cpi->total_v += v;
|
||||
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
|
||||
recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
|
||||
recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
|
||||
recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
sq_error = ye + ue + ve;
|
||||
|
||||
frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
|
||||
|
||||
cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye);
|
||||
cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue);
|
||||
cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve);
|
||||
cpi->total_sq_error += sq_error;
|
||||
cpi->total += frame_psnr;
|
||||
{
|
||||
@@ -5209,17 +5250,35 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
|
||||
vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0, IF_RTCD(&cm->rtcd.postproc));
|
||||
vp8_clear_system_state();
|
||||
frame_psnr2 = vp8_calc_psnr(cpi->Source, &cm->post_proc_buffer, &y2, &u2, &v2, &sq_error);
|
||||
frame_ssim2 = vp8_calc_ssim(cpi->Source, &cm->post_proc_buffer, 1, &weight);
|
||||
|
||||
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
|
||||
pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
|
||||
pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
|
||||
pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
sq_error = ye + ue + ve;
|
||||
|
||||
frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error);
|
||||
|
||||
cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye);
|
||||
cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue);
|
||||
cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve);
|
||||
cpi->total_sq_error2 += sq_error;
|
||||
cpi->totalp += frame_psnr2;
|
||||
|
||||
frame_ssim2 = vp8_calc_ssim(cpi->Source,
|
||||
&cm->post_proc_buffer, 1, &weight,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
cpi->summed_quality += frame_ssim2 * weight;
|
||||
cpi->summed_weights += weight;
|
||||
|
||||
cpi->totalp_y += y2;
|
||||
cpi->totalp_u += u2;
|
||||
cpi->totalp_v += v2;
|
||||
cpi->totalp += frame_psnr2;
|
||||
cpi->total_sq_error2 += sq_error;
|
||||
#if 0
|
||||
{
|
||||
FILE *f = fopen("q_used.stt", "a");
|
||||
|
@@ -603,12 +603,17 @@ typedef struct
|
||||
int encoding_thread_count;
|
||||
|
||||
pthread_t *h_encoding_thread;
|
||||
pthread_t h_filter_thread;
|
||||
|
||||
MB_ROW_COMP *mb_row_ei;
|
||||
ENCODETHREAD_DATA *en_thread_data;
|
||||
LPFTHREAD_DATA lpf_thread_data;
|
||||
|
||||
//events
|
||||
sem_t *h_event_start_encoding;
|
||||
sem_t h_event_end_encoding;
|
||||
sem_t h_event_start_lpf;
|
||||
sem_t h_event_end_lpf;
|
||||
#endif
|
||||
|
||||
TOKENLIST *tplist;
|
||||
|
@@ -664,7 +664,8 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
case V_PRED:
|
||||
case H_PRED:
|
||||
case TM_PRED:
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
|
||||
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
|
||||
this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
|
@@ -887,7 +887,8 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
{
|
||||
int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100;
|
||||
|
||||
if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) || (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level))
|
||||
if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) ||
|
||||
(cpi->bits_off_target < cpi->oxcf.optimal_buffer_level))
|
||||
{
|
||||
int percent_low = 0;
|
||||
|
||||
@@ -896,9 +897,12 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
// If we are are below the optimal buffer fullness level and adherence
|
||||
// to buffering contraints is important to the end useage then adjust
|
||||
// the per frame target.
|
||||
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level < cpi->oxcf.optimal_buffer_level))
|
||||
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
|
||||
(cpi->buffer_level < cpi->oxcf.optimal_buffer_level))
|
||||
{
|
||||
percent_low = (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / one_percent_bits;
|
||||
percent_low =
|
||||
(cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
|
||||
one_percent_bits;
|
||||
|
||||
if (percent_low > 100)
|
||||
percent_low = 100;
|
||||
@@ -909,7 +913,8 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
else if (cpi->bits_off_target < 0)
|
||||
{
|
||||
// Adjust per frame data target downwards to compensate.
|
||||
percent_low = (int)(100 * -cpi->bits_off_target / (cpi->total_byte_count * 8));
|
||||
percent_low = (int)(100 * -cpi->bits_off_target /
|
||||
(cpi->total_byte_count * 8));
|
||||
|
||||
if (percent_low > 100)
|
||||
percent_low = 100;
|
||||
@@ -918,39 +923,60 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
// lower the target bandwidth for this frame.
|
||||
cpi->this_frame_target = (cpi->this_frame_target * (100 - (percent_low / 2))) / 100;
|
||||
cpi->this_frame_target =
|
||||
(cpi->this_frame_target * (100 - (percent_low / 2))) / 100;
|
||||
|
||||
// Are we using allowing control of active_worst_allowed_q according to buffer level.
|
||||
// Are we using allowing control of active_worst_allowed_q
|
||||
// according to buffer level.
|
||||
if (cpi->auto_worst_q)
|
||||
{
|
||||
int critical_buffer_level;
|
||||
|
||||
// For streaming applications the most important factor is cpi->buffer_level as this takes
|
||||
// into account the specified short term buffering constraints. However, hitting the long
|
||||
// term clip data rate target is also important.
|
||||
// For streaming applications the most important factor is
|
||||
// cpi->buffer_level as this takes into account the
|
||||
// specified short term buffering constraints. However,
|
||||
// hitting the long term clip data rate target is also
|
||||
// important.
|
||||
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
|
||||
{
|
||||
// Take the smaller of cpi->buffer_level and cpi->bits_off_target
|
||||
critical_buffer_level = (cpi->buffer_level < cpi->bits_off_target) ? cpi->buffer_level : cpi->bits_off_target;
|
||||
// Take the smaller of cpi->buffer_level and
|
||||
// cpi->bits_off_target
|
||||
critical_buffer_level =
|
||||
(cpi->buffer_level < cpi->bits_off_target)
|
||||
? cpi->buffer_level : cpi->bits_off_target;
|
||||
}
|
||||
// For local file playback short term buffering contraints are less of an issue
|
||||
// For local file playback short term buffering contraints
|
||||
// are less of an issue
|
||||
else
|
||||
{
|
||||
// Consider only how we are doing for the clip as a whole
|
||||
// Consider only how we are doing for the clip as a
|
||||
// whole
|
||||
critical_buffer_level = cpi->bits_off_target;
|
||||
}
|
||||
|
||||
// Set the active worst quality based upon the selected buffer fullness number.
|
||||
// Set the active worst quality based upon the selected
|
||||
// buffer fullness number.
|
||||
if (critical_buffer_level < cpi->oxcf.optimal_buffer_level)
|
||||
{
|
||||
if (critical_buffer_level > (cpi->oxcf.optimal_buffer_level / 4))
|
||||
if ( critical_buffer_level >
|
||||
(cpi->oxcf.optimal_buffer_level >> 2) )
|
||||
{
|
||||
int qadjustment_range = cpi->worst_quality - cpi->ni_av_qi;
|
||||
int above_base = (critical_buffer_level - (cpi->oxcf.optimal_buffer_level / 4));
|
||||
INT64 qadjustment_range =
|
||||
cpi->worst_quality - cpi->ni_av_qi;
|
||||
INT64 above_base =
|
||||
(critical_buffer_level -
|
||||
(cpi->oxcf.optimal_buffer_level >> 2));
|
||||
|
||||
// Step active worst quality down from cpi->ni_av_qi when (critical_buffer_level == cpi->optimal_buffer_level)
|
||||
// to cpi->oxcf.worst_allowed_q when (critical_buffer_level == cpi->optimal_buffer_level/4)
|
||||
cpi->active_worst_quality = cpi->worst_quality - ((qadjustment_range * above_base) / (cpi->oxcf.optimal_buffer_level * 3 / 4));
|
||||
// Step active worst quality down from
|
||||
// cpi->ni_av_qi when (critical_buffer_level ==
|
||||
// cpi->optimal_buffer_level) to
|
||||
// cpi->worst_quality when
|
||||
// (critical_buffer_level ==
|
||||
// cpi->optimal_buffer_level >> 2)
|
||||
cpi->active_worst_quality =
|
||||
cpi->worst_quality -
|
||||
((qadjustment_range * above_base) /
|
||||
(cpi->oxcf.optimal_buffer_level*3>>2));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1010,6 +1036,15 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
// Set the active worst quality
|
||||
cpi->active_worst_quality = cpi->worst_quality;
|
||||
}
|
||||
|
||||
// Special trap for constrained quality mode
|
||||
// "active_worst_quality" may never drop below cq level
|
||||
// for any frame type.
|
||||
if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
|
||||
cpi->active_worst_quality < cpi->cq_target_quality)
|
||||
{
|
||||
cpi->active_worst_quality = cpi->cq_target_quality;
|
||||
}
|
||||
}
|
||||
|
||||
// Test to see if we have to drop a frame
|
||||
|
@@ -806,7 +806,8 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
|
||||
{
|
||||
x->e_mbd.mode_info_context->mbmi.mode = mode;
|
||||
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
|
||||
macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
|
||||
rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
|
||||
@@ -2103,7 +2104,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
case H_PRED:
|
||||
case TM_PRED:
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
|
||||
rate2 += rate_y;
|
||||
distortion2 += distortion;
|
||||
|
@@ -11,298 +11,13 @@
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "math.h"
|
||||
#include "onyx_int.h"
|
||||
|
||||
#define C1 (float)(64 * 64 * 0.01*255*0.01*255)
|
||||
#define C2 (float)(64 * 64 * 0.03*255*0.03*255)
|
||||
|
||||
static int width_y;
|
||||
static int height_y;
|
||||
static int height_uv;
|
||||
static int width_uv;
|
||||
static int stride_uv;
|
||||
static int stride;
|
||||
static int lumimask;
|
||||
static int luminance;
|
||||
static double plane_summed_weights = 0;
|
||||
|
||||
static short img12_sum_block[8*4096*4096*2] ;
|
||||
|
||||
static short img1_sum[8*4096*2];
|
||||
static short img2_sum[8*4096*2];
|
||||
static int img1_sq_sum[8*4096*2];
|
||||
static int img2_sq_sum[8*4096*2];
|
||||
static int img12_mul_sum[8*4096*2];
|
||||
|
||||
|
||||
double vp8_similarity
|
||||
(
|
||||
int mu_x,
|
||||
int mu_y,
|
||||
int pre_mu_x2,
|
||||
int pre_mu_y2,
|
||||
int pre_mu_xy2
|
||||
)
|
||||
{
|
||||
int mu_x2, mu_y2, mu_xy, theta_x2, theta_y2, theta_xy;
|
||||
|
||||
mu_x2 = mu_x * mu_x;
|
||||
mu_y2 = mu_y * mu_y;
|
||||
mu_xy = mu_x * mu_y;
|
||||
|
||||
theta_x2 = 64 * pre_mu_x2 - mu_x2;
|
||||
theta_y2 = 64 * pre_mu_y2 - mu_y2;
|
||||
theta_xy = 64 * pre_mu_xy2 - mu_xy;
|
||||
|
||||
return (2 * mu_xy + C1) * (2 * theta_xy + C2) / ((mu_x2 + mu_y2 + C1) * (theta_x2 + theta_y2 + C2));
|
||||
}
|
||||
|
||||
double vp8_ssim
|
||||
(
|
||||
const unsigned char *img1,
|
||||
const unsigned char *img2,
|
||||
int stride_img1,
|
||||
int stride_img2,
|
||||
int width,
|
||||
int height
|
||||
)
|
||||
{
|
||||
int x, y, x2, y2, img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block, temp;
|
||||
|
||||
double plane_quality, weight, mean;
|
||||
|
||||
short *img1_sum_ptr1, *img1_sum_ptr2;
|
||||
short *img2_sum_ptr1, *img2_sum_ptr2;
|
||||
int *img1_sq_sum_ptr1, *img1_sq_sum_ptr2;
|
||||
int *img2_sq_sum_ptr1, *img2_sq_sum_ptr2;
|
||||
int *img12_mul_sum_ptr1, *img12_mul_sum_ptr2;
|
||||
|
||||
plane_quality = 0;
|
||||
|
||||
if (lumimask)
|
||||
plane_summed_weights = 0.0f;
|
||||
else
|
||||
plane_summed_weights = (height - 7) * (width - 7);
|
||||
|
||||
//some prologue for the main loop
|
||||
temp = 8 * width;
|
||||
|
||||
img1_sum_ptr1 = img1_sum + temp;
|
||||
img2_sum_ptr1 = img2_sum + temp;
|
||||
img1_sq_sum_ptr1 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr1 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr1 = img12_mul_sum + temp;
|
||||
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
img1_sum[x] = img1[x];
|
||||
img2_sum[x] = img2[x];
|
||||
img1_sq_sum[x] = img1[x] * img1[x];
|
||||
img2_sq_sum[x] = img2[x] * img2[x];
|
||||
img12_mul_sum[x] = img1[x] * img2[x];
|
||||
|
||||
img1_sum_ptr1[x] = 0;
|
||||
img2_sum_ptr1[x] = 0;
|
||||
img1_sq_sum_ptr1[x] = 0;
|
||||
img2_sq_sum_ptr1[x] = 0;
|
||||
img12_mul_sum_ptr1[x] = 0;
|
||||
}
|
||||
|
||||
//the main loop
|
||||
for (y = 1; y < height; y++)
|
||||
{
|
||||
img1 += stride_img1;
|
||||
img2 += stride_img2;
|
||||
|
||||
temp = (y - 1) % 9 * width;
|
||||
|
||||
img1_sum_ptr1 = img1_sum + temp;
|
||||
img2_sum_ptr1 = img2_sum + temp;
|
||||
img1_sq_sum_ptr1 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr1 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr1 = img12_mul_sum + temp;
|
||||
|
||||
temp = y % 9 * width;
|
||||
|
||||
img1_sum_ptr2 = img1_sum + temp;
|
||||
img2_sum_ptr2 = img2_sum + temp;
|
||||
img1_sq_sum_ptr2 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr2 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr2 = img12_mul_sum + temp;
|
||||
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
img1_sum_ptr2[x] = img1_sum_ptr1[x] + img1[x];
|
||||
img2_sum_ptr2[x] = img2_sum_ptr1[x] + img2[x];
|
||||
img1_sq_sum_ptr2[x] = img1_sq_sum_ptr1[x] + img1[x] * img1[x];
|
||||
img2_sq_sum_ptr2[x] = img2_sq_sum_ptr1[x] + img2[x] * img2[x];
|
||||
img12_mul_sum_ptr2[x] = img12_mul_sum_ptr1[x] + img1[x] * img2[x];
|
||||
}
|
||||
|
||||
if (y > 6)
|
||||
{
|
||||
//calculate the sum of the last 8 lines by subtracting the total sum of 8 lines back from the present sum
|
||||
temp = (y + 1) % 9 * width;
|
||||
|
||||
img1_sum_ptr1 = img1_sum + temp;
|
||||
img2_sum_ptr1 = img2_sum + temp;
|
||||
img1_sq_sum_ptr1 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr1 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr1 = img12_mul_sum + temp;
|
||||
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
img1_sum_ptr1[x] = img1_sum_ptr2[x] - img1_sum_ptr1[x];
|
||||
img2_sum_ptr1[x] = img2_sum_ptr2[x] - img2_sum_ptr1[x];
|
||||
img1_sq_sum_ptr1[x] = img1_sq_sum_ptr2[x] - img1_sq_sum_ptr1[x];
|
||||
img2_sq_sum_ptr1[x] = img2_sq_sum_ptr2[x] - img2_sq_sum_ptr1[x];
|
||||
img12_mul_sum_ptr1[x] = img12_mul_sum_ptr2[x] - img12_mul_sum_ptr1[x];
|
||||
}
|
||||
|
||||
//here we calculate the sum over the 8x8 block of pixels
|
||||
//this is done by sliding a window across the column sums for the last 8 lines
|
||||
//each time adding the new column sum, and subtracting the one which fell out of the window
|
||||
img1_block = 0;
|
||||
img2_block = 0;
|
||||
img1_sq_block = 0;
|
||||
img2_sq_block = 0;
|
||||
img12_mul_block = 0;
|
||||
|
||||
//prologue, and calculation of simularity measure from the first 8 column sums
|
||||
for (x = 0; x < 8; x++)
|
||||
{
|
||||
img1_block += img1_sum_ptr1[x];
|
||||
img2_block += img2_sum_ptr1[x];
|
||||
img1_sq_block += img1_sq_sum_ptr1[x];
|
||||
img2_sq_block += img2_sq_sum_ptr1[x];
|
||||
img12_mul_block += img12_mul_sum_ptr1[x];
|
||||
}
|
||||
|
||||
if (lumimask)
|
||||
{
|
||||
y2 = y - 7;
|
||||
x2 = 0;
|
||||
|
||||
if (luminance)
|
||||
{
|
||||
mean = (img2_block + img1_block) / 128.0f;
|
||||
|
||||
if (!(y2 % 2 || x2 % 2))
|
||||
*(img12_sum_block + y2 / 2 * width_uv + x2 / 2) = img2_block + img1_block;
|
||||
}
|
||||
else
|
||||
{
|
||||
mean = *(img12_sum_block + y2 * width_uv + x2);
|
||||
mean += *(img12_sum_block + y2 * width_uv + x2 + 4);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2 + 4);
|
||||
|
||||
mean /= 512.0f;
|
||||
}
|
||||
|
||||
weight = mean < 40 ? 0.0f :
|
||||
(mean < 50 ? (mean - 40.0f) / 10.0f : 1.0f);
|
||||
plane_summed_weights += weight;
|
||||
|
||||
plane_quality += weight * vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
}
|
||||
else
|
||||
plane_quality += vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
|
||||
//and for the rest
|
||||
for (x = 8; x < width; x++)
|
||||
{
|
||||
img1_block = img1_block + img1_sum_ptr1[x] - img1_sum_ptr1[x - 8];
|
||||
img2_block = img2_block + img2_sum_ptr1[x] - img2_sum_ptr1[x - 8];
|
||||
img1_sq_block = img1_sq_block + img1_sq_sum_ptr1[x] - img1_sq_sum_ptr1[x - 8];
|
||||
img2_sq_block = img2_sq_block + img2_sq_sum_ptr1[x] - img2_sq_sum_ptr1[x - 8];
|
||||
img12_mul_block = img12_mul_block + img12_mul_sum_ptr1[x] - img12_mul_sum_ptr1[x - 8];
|
||||
|
||||
if (lumimask)
|
||||
{
|
||||
y2 = y - 7;
|
||||
x2 = x - 7;
|
||||
|
||||
if (luminance)
|
||||
{
|
||||
mean = (img2_block + img1_block) / 128.0f;
|
||||
|
||||
if (!(y2 % 2 || x2 % 2))
|
||||
*(img12_sum_block + y2 / 2 * width_uv + x2 / 2) = img2_block + img1_block;
|
||||
}
|
||||
else
|
||||
{
|
||||
mean = *(img12_sum_block + y2 * width_uv + x2);
|
||||
mean += *(img12_sum_block + y2 * width_uv + x2 + 4);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2 + 4);
|
||||
|
||||
mean /= 512.0f;
|
||||
}
|
||||
|
||||
weight = mean < 40 ? 0.0f :
|
||||
(mean < 50 ? (mean - 40.0f) / 10.0f : 1.0f);
|
||||
plane_summed_weights += weight;
|
||||
|
||||
plane_quality += weight * vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
}
|
||||
else
|
||||
plane_quality += vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (plane_summed_weights == 0)
|
||||
return 1.0f;
|
||||
else
|
||||
return plane_quality / plane_summed_weights;
|
||||
}
|
||||
|
||||
double vp8_calc_ssim
|
||||
(
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
int lumamask,
|
||||
double *weight
|
||||
)
|
||||
{
|
||||
double a, b, c;
|
||||
double frame_weight;
|
||||
double ssimv;
|
||||
|
||||
width_y = source->y_width;
|
||||
height_y = source->y_height;
|
||||
height_uv = source->uv_height;
|
||||
width_uv = source->uv_width;
|
||||
stride_uv = dest->uv_stride;
|
||||
stride = dest->y_stride;
|
||||
|
||||
lumimask = lumamask;
|
||||
|
||||
luminance = 1;
|
||||
a = vp8_ssim(source->y_buffer, dest->y_buffer,
|
||||
source->y_stride, dest->y_stride, source->y_width, source->y_height);
|
||||
luminance = 0;
|
||||
|
||||
frame_weight = plane_summed_weights / ((width_y - 7) * (height_y - 7));
|
||||
|
||||
if (frame_weight == 0)
|
||||
a = b = c = 1.0f;
|
||||
else
|
||||
{
|
||||
b = vp8_ssim(source->u_buffer, dest->u_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width, source->uv_height);
|
||||
|
||||
c = vp8_ssim(source->v_buffer, dest->v_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width, source->uv_height);
|
||||
}
|
||||
|
||||
ssimv = a * .8 + .1 * (b + c);
|
||||
|
||||
*weight = frame_weight;
|
||||
|
||||
return ssimv;
|
||||
}
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#define IF_RTCD(x) (x)
|
||||
#else
|
||||
#define IF_RTCD(x) NULL
|
||||
#endif
|
||||
// Google version of SSIM
|
||||
// SSIM
|
||||
#define KERNEL 3
|
||||
@@ -520,3 +235,174 @@ double vp8_calc_ssimg
|
||||
*ssim_v /= uvsize;
|
||||
return ssim_all;
|
||||
}
|
||||
|
||||
|
||||
void ssim_parms_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
)
|
||||
{
|
||||
int i,j;
|
||||
for(i=0;i<16;i++,s+=sp,r+=rp)
|
||||
{
|
||||
for(j=0;j<16;j++)
|
||||
{
|
||||
*sum_s += s[j];
|
||||
*sum_r += r[j];
|
||||
*sum_sq_s += s[j] * s[j];
|
||||
*sum_sq_r += r[j] * r[j];
|
||||
*sum_sxr += s[j] * r[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
void ssim_parms_8x8_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
)
|
||||
{
|
||||
int i,j;
|
||||
for(i=0;i<8;i++,s+=sp,r+=rp)
|
||||
{
|
||||
for(j=0;j<8;j++)
|
||||
{
|
||||
*sum_s += s[j];
|
||||
*sum_r += r[j];
|
||||
*sum_sq_s += s[j] * s[j];
|
||||
*sum_sq_r += r[j] * r[j];
|
||||
*sum_sxr += s[j] * r[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const static long long c1 = 426148; // (256^2*(.01*255)^2
|
||||
const static long long c2 = 3835331; //(256^2*(.03*255)^2
|
||||
|
||||
static double similarity
|
||||
(
|
||||
unsigned long sum_s,
|
||||
unsigned long sum_r,
|
||||
unsigned long sum_sq_s,
|
||||
unsigned long sum_sq_r,
|
||||
unsigned long sum_sxr,
|
||||
int count
|
||||
)
|
||||
{
|
||||
long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
|
||||
|
||||
long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||
(count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
|
||||
|
||||
return ssim_n * 1.0 / ssim_d;
|
||||
}
|
||||
|
||||
static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
|
||||
}
|
||||
static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||
rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
|
||||
}
|
||||
|
||||
// TODO: (jbb) tried to scale this function such that we may be able to use it
|
||||
// for distortion metric in mode selection code ( provided we do a reconstruction)
|
||||
long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||
double ssim3;
|
||||
long long ssim_n;
|
||||
long long ssim_d;
|
||||
|
||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||
ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
|
||||
|
||||
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||
(256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
|
||||
|
||||
ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
|
||||
return (long)( 256*ssim3 * ssim3 );
|
||||
}
|
||||
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
|
||||
// such that the window regions overlap block boundaries to penalize blocking
|
||||
// artifacts.
|
||||
|
||||
double vp8_ssim2
|
||||
(
|
||||
unsigned char *img1,
|
||||
unsigned char *img2,
|
||||
int stride_img1,
|
||||
int stride_img2,
|
||||
int width,
|
||||
int height,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd
|
||||
)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
double ssim_total=0;
|
||||
|
||||
// we can sample points as frequently as we like start with 1 per 8x8
|
||||
for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
|
||||
{
|
||||
for(j=0; j < width; j+=8 )
|
||||
{
|
||||
ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
|
||||
}
|
||||
}
|
||||
ssim_total /= (width/8 * height /8);
|
||||
return ssim_total;
|
||||
|
||||
}
|
||||
double vp8_calc_ssim
|
||||
(
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
int lumamask,
|
||||
double *weight,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd
|
||||
)
|
||||
{
|
||||
double a, b, c;
|
||||
double ssimv;
|
||||
|
||||
a = vp8_ssim2(source->y_buffer, dest->y_buffer,
|
||||
source->y_stride, dest->y_stride, source->y_width,
|
||||
source->y_height, rtcd);
|
||||
|
||||
b = vp8_ssim2(source->u_buffer, dest->u_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width,
|
||||
source->uv_height, rtcd);
|
||||
|
||||
c = vp8_ssim2(source->v_buffer, dest->v_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width,
|
||||
source->uv_height, rtcd);
|
||||
|
||||
ssimv = a * .8 + .1 * (b + c);
|
||||
|
||||
*weight = 1;
|
||||
|
||||
return ssimv;
|
||||
}
|
||||
|
@@ -85,6 +85,19 @@
|
||||
unsigned int *sse \
|
||||
);
|
||||
|
||||
#define prototype_ssimpf(sym) \
|
||||
void (sym) \
|
||||
( \
|
||||
unsigned char *s, \
|
||||
int sp, \
|
||||
unsigned char *r, \
|
||||
int rp, \
|
||||
unsigned long *sum_s, \
|
||||
unsigned long *sum_r, \
|
||||
unsigned long *sum_sq_s, \
|
||||
unsigned long *sum_sq_r, \
|
||||
unsigned long *sum_sxr \
|
||||
);
|
||||
|
||||
#define prototype_getmbss(sym) unsigned int (sym)(const short *)
|
||||
|
||||
@@ -306,6 +319,15 @@ extern prototype_variance2(vp8_variance_get16x16var);
|
||||
#endif
|
||||
extern prototype_sad(vp8_variance_get4x4sse_cs);
|
||||
|
||||
#ifndef vp8_ssimpf
|
||||
#define vp8_ssimpf ssim_parms_c
|
||||
#endif
|
||||
extern prototype_ssimpf(vp8_ssimpf)
|
||||
|
||||
#ifndef vp8_ssimpf_8x8
|
||||
#define vp8_ssimpf_8x8 ssim_parms_8x8_c
|
||||
#endif
|
||||
extern prototype_ssimpf(vp8_ssimpf_8x8)
|
||||
|
||||
typedef prototype_sad(*vp8_sad_fn_t);
|
||||
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
|
||||
@@ -315,6 +337,10 @@ typedef prototype_variance(*vp8_variance_fn_t);
|
||||
typedef prototype_variance2(*vp8_variance2_fn_t);
|
||||
typedef prototype_subpixvariance(*vp8_subpixvariance_fn_t);
|
||||
typedef prototype_getmbss(*vp8_getmbss_fn_t);
|
||||
|
||||
typedef prototype_ssimpf(*vp8_ssimpf_fn_t)
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_sad_fn_t sad4x4;
|
||||
@@ -365,6 +391,11 @@ typedef struct
|
||||
vp8_sad_multi_d_fn_t sad8x8x4d;
|
||||
vp8_sad_multi_d_fn_t sad4x4x4d;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
vp8_ssimpf_fn_t ssimpf_8x8;
|
||||
vp8_ssimpf_fn_t ssimpf;
|
||||
#endif
|
||||
|
||||
} vp8_variance_rtcd_vtable_t;
|
||||
|
||||
typedef struct
|
||||
@@ -378,6 +409,7 @@ typedef struct
|
||||
vp8_sad_multi_fn_t sdx3f;
|
||||
vp8_sad_multi1_fn_t sdx8f;
|
||||
vp8_sad_multi_d_fn_t sdx4df;
|
||||
|
||||
} vp8_variance_fn_ptr_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@@ -186,7 +186,7 @@ sym(vp8_sad16x16x8_sse4):
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -224,7 +224,7 @@ sym(vp8_sad16x8x8_sse4):
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -262,7 +262,7 @@ sym(vp8_sad8x8x8_sse4):
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -303,7 +303,7 @@ sym(vp8_sad8x16x8_sse4):
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -339,7 +339,7 @@ sym(vp8_sad4x4x8_sse4):
|
||||
PROCESS_4X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
|
215
vp8/encoder/x86/ssim_opt.asm
Normal file
215
vp8/encoder/x86/ssim_opt.asm
Normal file
@@ -0,0 +1,215 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
|
||||
%macro TABULATE_SSIM 0
|
||||
paddusw xmm15, xmm3 ; sum_s
|
||||
paddusw xmm14, xmm4 ; sum_r
|
||||
movdqa xmm1, xmm3
|
||||
pmaddwd xmm1, xmm1
|
||||
paddq xmm13, xmm1 ; sum_sq_s
|
||||
movdqa xmm2, xmm4
|
||||
pmaddwd xmm2, xmm2
|
||||
paddq xmm12, xmm2 ; sum_sq_r
|
||||
pmaddwd xmm3, xmm4
|
||||
paddq xmm11, xmm3 ; sum_sxr
|
||||
%endmacro
|
||||
|
||||
; Sum across the register %1 starting with q words
|
||||
%macro SUM_ACROSS_Q 1
|
||||
movdqa xmm2,%1
|
||||
punpckldq %1,xmm0
|
||||
punpckhdq xmm2,xmm0
|
||||
paddq %1,xmm2
|
||||
movdqa xmm2,%1
|
||||
punpcklqdq %1,xmm0
|
||||
punpckhqdq xmm2,xmm0
|
||||
paddq %1,xmm2
|
||||
%endmacro
|
||||
|
||||
; Sum across the register %1 starting with q words
|
||||
%macro SUM_ACROSS_W 1
|
||||
movdqa xmm1, %1
|
||||
punpcklwd %1,xmm0
|
||||
punpckhwd xmm1,xmm0
|
||||
paddd %1, xmm1
|
||||
SUM_ACROSS_Q %1
|
||||
%endmacro
|
||||
;void ssim_parms_sse3(
|
||||
; unsigned char *s,
|
||||
; int sp,
|
||||
; unsigned char *r,
|
||||
; int rp
|
||||
; unsigned long *sum_s,
|
||||
; unsigned long *sum_r,
|
||||
; unsigned long *sum_sq_s,
|
||||
; unsigned long *sum_sq_r,
|
||||
; unsigned long *sum_sxr);
|
||||
;
|
||||
; TODO: Use parm passing through structure, probably don't need the pxors
|
||||
; ( calling app will initialize to 0 ) could easily fit everything in sse2
|
||||
; without too much hastle, and can probably do better estimates with psadw
|
||||
; or pavgb At this point this is just meant to be first pass for calculating
|
||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||
; in mode selection code.
|
||||
global sym(vp8_ssim_parms_16x16_sse3)
|
||||
sym(vp8_ssim_parms_16x16_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;s
|
||||
mov rcx, arg(1) ;sp
|
||||
mov rdi, arg(2) ;r
|
||||
mov rax, arg(3) ;rp
|
||||
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm15,xmm15 ;sum_s
|
||||
pxor xmm14,xmm14 ;sum_r
|
||||
pxor xmm13,xmm13 ;sum_sq_s
|
||||
pxor xmm12,xmm12 ;sum_sq_r
|
||||
pxor xmm11,xmm11 ;sum_sxr
|
||||
|
||||
mov rdx, 16 ;row counter
|
||||
NextRow:
|
||||
|
||||
;grab source and reference pixels
|
||||
movdqu xmm5, [rsi]
|
||||
movdqu xmm6, [rdi]
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm4, xmm6
|
||||
punpckhbw xmm3, xmm0 ; high_s
|
||||
punpckhbw xmm4, xmm0 ; high_r
|
||||
|
||||
TABULATE_SSIM
|
||||
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm4, xmm6
|
||||
punpcklbw xmm3, xmm0 ; low_s
|
||||
punpcklbw xmm4, xmm0 ; low_r
|
||||
|
||||
TABULATE_SSIM
|
||||
|
||||
add rsi, rcx ; next s row
|
||||
add rdi, rax ; next r row
|
||||
|
||||
dec rdx ; counter
|
||||
jnz NextRow
|
||||
|
||||
SUM_ACROSS_W xmm15
|
||||
SUM_ACROSS_W xmm14
|
||||
SUM_ACROSS_Q xmm13
|
||||
SUM_ACROSS_Q xmm12
|
||||
SUM_ACROSS_Q xmm11
|
||||
|
||||
mov rdi,arg(4)
|
||||
movq [rdi], xmm15;
|
||||
mov rdi,arg(5)
|
||||
movq [rdi], xmm14;
|
||||
mov rdi,arg(6)
|
||||
movq [rdi], xmm13;
|
||||
mov rdi,arg(7)
|
||||
movq [rdi], xmm12;
|
||||
mov rdi,arg(8)
|
||||
movq [rdi], xmm11;
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void ssim_parms_sse3(
|
||||
; unsigned char *s,
|
||||
; int sp,
|
||||
; unsigned char *r,
|
||||
; int rp
|
||||
; unsigned long *sum_s,
|
||||
; unsigned long *sum_r,
|
||||
; unsigned long *sum_sq_s,
|
||||
; unsigned long *sum_sq_r,
|
||||
; unsigned long *sum_sxr);
|
||||
;
|
||||
; TODO: Use parm passing through structure, probably don't need the pxors
|
||||
; ( calling app will initialize to 0 ) could easily fit everything in sse2
|
||||
; without too much hastle, and can probably do better estimates with psadw
|
||||
; or pavgb At this point this is just meant to be first pass for calculating
|
||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||
; in mode selection code.
|
||||
global sym(vp8_ssim_parms_8x8_sse3)
|
||||
sym(vp8_ssim_parms_8x8_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;s
|
||||
mov rcx, arg(1) ;sp
|
||||
mov rdi, arg(2) ;r
|
||||
mov rax, arg(3) ;rp
|
||||
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm15,xmm15 ;sum_s
|
||||
pxor xmm14,xmm14 ;sum_r
|
||||
pxor xmm13,xmm13 ;sum_sq_s
|
||||
pxor xmm12,xmm12 ;sum_sq_r
|
||||
pxor xmm11,xmm11 ;sum_sxr
|
||||
|
||||
mov rdx, 8 ;row counter
|
||||
NextRow2:
|
||||
|
||||
;grab source and reference pixels
|
||||
movq xmm5, [rsi]
|
||||
movq xmm6, [rdi]
|
||||
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm4, xmm6
|
||||
punpcklbw xmm3, xmm0 ; low_s
|
||||
punpcklbw xmm4, xmm0 ; low_r
|
||||
|
||||
TABULATE_SSIM
|
||||
|
||||
add rsi, rcx ; next s row
|
||||
add rdi, rax ; next r row
|
||||
|
||||
dec rdx ; counter
|
||||
jnz NextRow2
|
||||
|
||||
SUM_ACROSS_W xmm15
|
||||
SUM_ACROSS_W xmm14
|
||||
SUM_ACROSS_Q xmm13
|
||||
SUM_ACROSS_Q xmm12
|
||||
SUM_ACROSS_Q xmm11
|
||||
|
||||
mov rdi,arg(4)
|
||||
movq [rdi], xmm15;
|
||||
mov rdi,arg(5)
|
||||
movq [rdi], xmm14;
|
||||
mov rdi,arg(6)
|
||||
movq [rdi], xmm13;
|
||||
mov rdi,arg(7)
|
||||
movq [rdi], xmm12;
|
||||
mov rdi,arg(8)
|
||||
movq [rdi], xmm11;
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
@@ -176,6 +176,25 @@ void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
|
||||
d->dqcoeff
|
||||
);
|
||||
}
|
||||
#if CONFIG_PSNR
|
||||
#if ARCH_X86_64
|
||||
typedef void ssimpf
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
);
|
||||
|
||||
extern ssimpf vp8_ssim_parms_16x16_sse3;
|
||||
extern ssimpf vp8_ssim_parms_8x8_sse3;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -280,6 +299,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_sse2;
|
||||
cpi->rtcd.variance.get8x8var = vp8_get8x8var_sse2;
|
||||
cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2;
|
||||
|
||||
|
||||
/* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */;
|
||||
|
||||
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2;
|
||||
@@ -339,9 +360,18 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
#if ARCH_X86_64
|
||||
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3;
|
||||
cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
if (SSE4_1Enabled)
|
||||
{
|
||||
|
@@ -116,6 +116,7 @@ VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm
|
||||
|
||||
ifeq ($(CONFIG_REALTIME_ONLY),yes)
|
||||
VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
||||
|
@@ -34,6 +34,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar
|
||||
|
||||
#File list for armv6
|
||||
# encoder
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM)
|
||||
|
Reference in New Issue
Block a user