Merge remote branch 'internal/upstream' into HEAD
This commit is contained in:
commit
c6ea558c05
3
configure
vendored
3
configure
vendored
@ -40,7 +40,6 @@ Advanced options:
|
||||
${toggle_runtime_cpu_detect} runtime cpu detection
|
||||
${toggle_shared} shared library support
|
||||
${toggle_small} favor smaller size over speed
|
||||
${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only)
|
||||
${toggle_postproc_visualizer} macro block / block level visualizers
|
||||
|
||||
Codecs:
|
||||
@ -260,7 +259,6 @@ CONFIG_LIST="
|
||||
realtime_only
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
os_support
|
||||
|
||||
@ -304,7 +302,6 @@ CMDLINE_SELECT="
|
||||
realtime_only
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
|
||||
experimental
|
||||
|
@ -1,320 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_decode_mb_tokens_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
INCLUDE asm_dec_offsets.asm
|
||||
|
||||
l_qcoeff EQU 0
|
||||
l_i EQU 4
|
||||
l_type EQU 8
|
||||
l_stop EQU 12
|
||||
l_c EQU 16
|
||||
l_l_ptr EQU 20
|
||||
l_a_ptr EQU 24
|
||||
l_bc EQU 28
|
||||
l_coef_ptr EQU 32
|
||||
l_stacksize EQU 64
|
||||
|
||||
|
||||
;; constant offsets -- these should be created at build time
|
||||
c_block2above_offset EQU 25
|
||||
c_entropy_nodes EQU 11
|
||||
c_dct_eob_token EQU 11
|
||||
|
||||
|vp8_decode_mb_tokens_v6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
sub sp, sp, #l_stacksize
|
||||
mov r7, r1 ; type
|
||||
mov r9, r0 ; detoken
|
||||
|
||||
ldr r1, [r9, #detok_current_bc]
|
||||
ldr r0, [r9, #detok_qcoeff_start_ptr]
|
||||
mov r11, #0 ; i
|
||||
mov r3, #16 ; stop
|
||||
|
||||
cmp r7, #1 ; type ?= 1
|
||||
addeq r11, r11, #24 ; i = 24
|
||||
addeq r3, r3, #8 ; stop = 24
|
||||
addeq r0, r0, #3, 24 ; qcoefptr += 24*16
|
||||
|
||||
str r0, [sp, #l_qcoeff]
|
||||
str r11, [sp, #l_i]
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
str r1, [sp, #l_bc]
|
||||
|
||||
add lr, r9, r7, lsl #2 ; detoken + type*4
|
||||
|
||||
ldr r8, [r1, #bool_decoder_user_buffer]
|
||||
|
||||
ldr r10, [lr, #detok_coef_probs]
|
||||
ldr r5, [r1, #bool_decoder_count]
|
||||
ldr r6, [r1, #bool_decoder_range]
|
||||
ldr r4, [r1, #bool_decoder_value]
|
||||
|
||||
str r10, [sp, #l_coef_ptr]
|
||||
|
||||
BLOCK_LOOP
|
||||
ldr r3, [r9, #detok_ptr_block2leftabove]
|
||||
ldr r1, [r9, #detok_L]
|
||||
ldr r2, [r9, #detok_A]
|
||||
ldrb r12, [r3, r11]! ; block2left[i]
|
||||
ldrb r3, [r3, #c_block2above_offset]; block2above[i]
|
||||
|
||||
cmp r7, #0 ; c = !type
|
||||
moveq r7, #1
|
||||
movne r7, #0
|
||||
|
||||
ldrb r0, [r1, r12]! ; *(L += block2left[i])
|
||||
ldrb r3, [r2, r3]! ; *(A += block2above[i])
|
||||
mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
|
||||
|
||||
; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
|
||||
cmp r0, #0 ; *l ?= 0
|
||||
movne r0, #1
|
||||
cmp r3, #0 ; *a ?= 0
|
||||
addne r0, r0, #1 ; t
|
||||
|
||||
str r1, [sp, #l_l_ptr] ; save &l
|
||||
str r2, [sp, #l_a_ptr] ; save &a
|
||||
smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
|
||||
mov r1, #0 ; t = 0
|
||||
str r7, [sp, #l_c]
|
||||
|
||||
;align 4
|
||||
COEFF_LOOP
|
||||
ldr r3, [r9, #detok_ptr_coef_bands_x]
|
||||
ldr lr, [r9, #detok_coef_tree_ptr]
|
||||
;STALL
|
||||
ldrb r3, [r3, r7] ; coef_bands_x[c]
|
||||
;STALL
|
||||
;STALL
|
||||
add r0, r0, r3 ; Prob += coef_bands_x[c]
|
||||
|
||||
get_token_loop
|
||||
ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
|
||||
mov r3, r6, lsl #8 ; range << 8
|
||||
sub r3, r3, #256 ; (range << 8) - (1 << 8)
|
||||
mov r10, #1 ; 1
|
||||
|
||||
smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
|
||||
;++
|
||||
|
||||
subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
|
||||
addhs r1, r1, #1 ; t += 1
|
||||
movhs r4, r3 ; value -= bigsplit (split << 24)
|
||||
subhs r2, r6, r2 ; range -= split
|
||||
; movlo r6, r2 ; range = split
|
||||
|
||||
ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
|
||||
sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range <<= shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
|
||||
; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
|
||||
addle r5, r5, #8 ; count += 8
|
||||
rsble r3, r5, #24 ; 24 - count
|
||||
addle r8, r8, #1 ; bufptr++
|
||||
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
|
||||
|
||||
cmp r1, #0 ; t ?= 0
|
||||
bgt get_token_loop ; while (t > 0)
|
||||
|
||||
cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
|
||||
beq END_OF_BLOCK ; break
|
||||
|
||||
rsb lr, r1, #0 ; v = -t;
|
||||
|
||||
cmp lr, #4 ; if(v > FOUR_TOKEN)
|
||||
ble SKIP_EXTRABITS
|
||||
|
||||
ldr r3, [r9, #detok_teb_base_ptr]
|
||||
mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
|
||||
add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
|
||||
|
||||
ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
|
||||
ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
|
||||
|
||||
extrabits_loop
|
||||
add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
|
||||
|
||||
ldrb r2, [r3, #4] ; probability. why +4?
|
||||
mov r3, r6, lsl #8 ; range << 8
|
||||
sub r3, r3, #256 ; range << 8 + 1 << 8
|
||||
|
||||
smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
ldrb r12, [r8] ; *bufptr
|
||||
;++
|
||||
|
||||
subs r10, r4, r2, lsl #24 ; value - (split<<24)
|
||||
movhs r4, r10 ; value = value - (split << 24)
|
||||
subhs r2, r6, r2 ; range = range - split
|
||||
addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; shift - leading zeros in split
|
||||
sub r3, r3, #24 ; don't count first 3 bytes
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range = range << shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
|
||||
addle r5, r5, #8 ; count += BR_COUNT
|
||||
addle r8, r8, #1 ; bufptr++
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
||||
|
||||
subs r0, r0, #1 ; bits_count --
|
||||
bpl extrabits_loop
|
||||
|
||||
|
||||
SKIP_EXTRABITS
|
||||
ldr r11, [sp, #l_qcoeff]
|
||||
ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
|
||||
|
||||
cmp r1, #0 ; check for nonzero token - if (t)
|
||||
beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
|
||||
|
||||
add r3, r6, #1 ; range + 1
|
||||
mov r2, r3, lsr #1 ; split = (range + 1) >> 1
|
||||
|
||||
subs r3, r4, r2, lsl #24 ; value - (split<<24)
|
||||
movhs r4, r3 ; value -= (split << 24)
|
||||
subhs r2, r6, r2 ; range -= split
|
||||
mvnhs r3, lr ; -v
|
||||
addhs lr, r3, #1 ; v = (v ^ -1) + 1
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; leading 0s in split
|
||||
sub r3, r3, #24 ; shift
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range <<= shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
ldrleb r2, [r8], #1 ; *(bufptr++)
|
||||
addle r5, r5, #8 ; count += 8
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
||||
|
||||
add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
cmn r1, #1 ; t < -ONE_TOKEN
|
||||
|
||||
addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
mvn r1, #1 ; t = -1 ???? C is -2
|
||||
|
||||
SKIP_EOB_CHECK
|
||||
ldr r7, [sp, #l_c] ; c
|
||||
ldr r3, [r9, #detok_scan]
|
||||
add r1, r1, #2 ; t+= 2
|
||||
cmp r7, #15 ; c should will be one higher
|
||||
|
||||
ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
|
||||
add r7, r7, #1 ; c++
|
||||
add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
|
||||
|
||||
str r7, [sp, #l_c] ; store c
|
||||
strh lr, [r3] ; qcoef_ptr[scan[c]] = v
|
||||
|
||||
blt COEFF_LOOP
|
||||
|
||||
sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
|
||||
|
||||
END_OF_BLOCK
|
||||
ldr r3, [sp, #l_type] ; type
|
||||
ldr r10, [sp, #l_coef_ptr] ; coef_ptr
|
||||
ldr r0, [sp, #l_qcoeff] ; qcoeff
|
||||
ldr r11, [sp, #l_i] ; i
|
||||
ldr r12, [sp, #l_stop] ; stop
|
||||
|
||||
cmp r3, #0 ; type ?= 0
|
||||
moveq r1, #1
|
||||
movne r1, #0
|
||||
add r3, r11, r9 ; detok + i
|
||||
|
||||
cmp r7, r1 ; c ?= !type
|
||||
strb r7, [r3, #detok_eob] ; eob[i] = c
|
||||
|
||||
ldr r7, [sp, #l_l_ptr] ; l
|
||||
ldr r2, [sp, #l_a_ptr] ; a
|
||||
movne r3, #1 ; t
|
||||
moveq r3, #0
|
||||
|
||||
add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
|
||||
add r11, r11, #1 ; i++
|
||||
strb r3, [r7] ; *l = t
|
||||
strb r3, [r2] ; *a = t
|
||||
str r0, [sp, #l_qcoeff] ; qcoeff
|
||||
str r11, [sp, #l_i] ; i
|
||||
|
||||
cmp r11, r12 ; i < stop
|
||||
ldr r7, [sp, #l_type] ; type
|
||||
|
||||
blt BLOCK_LOOP
|
||||
|
||||
cmp r11, #25 ; i ?= 25
|
||||
bne ln2_decode_mb_to
|
||||
|
||||
ldr r12, [r9, #detok_qcoeff_start_ptr]
|
||||
ldr r10, [r9, #detok_coef_probs]
|
||||
mov r7, #0 ; type/i = 0
|
||||
mov r3, #16 ; stop = 16
|
||||
str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
|
||||
str r7, [sp, #l_i]
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
|
||||
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln2_decode_mb_to
|
||||
cmp r11, #16 ; i ?= 16
|
||||
bne ln1_decode_mb_to
|
||||
|
||||
mov r10, #detok_coef_probs
|
||||
add r10, r10, #2*4 ; coef_probs[type]
|
||||
ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
|
||||
|
||||
mov r7, #2 ; type = 2
|
||||
mov r3, #24 ; stop = 24
|
||||
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln1_decode_mb_to
|
||||
ldr r2, [sp, #l_bc]
|
||||
mov r0, #0
|
||||
nop
|
||||
|
||||
str r8, [r2, #bool_decoder_user_buffer]
|
||||
str r5, [r2, #bool_decoder_count]
|
||||
str r4, [r2, #bool_decoder_value]
|
||||
str r6, [r2, #bool_decoder_range]
|
||||
|
||||
add sp, sp, #l_stacksize
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP ; |vp8_decode_mb_tokens_v6|
|
||||
|
||||
END
|
@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DETOKENIZE_ARM_H
|
||||
#define DETOKENIZE_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
void vp8_init_detokenizer(VP8D_COMP *dx);
|
||||
void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
@ -74,37 +74,6 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
/* mashup of vp8_block2left and vp8_block2above so we only need one pointer
|
||||
* for the assembly version.
|
||||
*/
|
||||
DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
|
||||
{
|
||||
/* vp8_block2left */
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
/* vp8_block2above */
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
||||
|
||||
void vp8_init_detokenizer(VP8D_COMP *dx)
|
||||
{
|
||||
const VP8_COMMON *const oc = & dx->common;
|
||||
MACROBLOCKD *x = & dx->mb;
|
||||
|
||||
dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
|
||||
dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
|
||||
dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
|
||||
dx->detoken.scan = vp8_default_zig_zag1d;
|
||||
dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
|
||||
dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
|
||||
|
||||
dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
|
||||
dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
|
||||
dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
|
||||
dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
|
||||
#define FILL \
|
||||
if(count < 0) \
|
||||
@ -202,35 +171,6 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
|
||||
}\
|
||||
NORMALIZE
|
||||
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
|
||||
{
|
||||
int eobtotal = 0;
|
||||
int i, type;
|
||||
|
||||
dx->detoken.current_bc = x->current_bc;
|
||||
dx->detoken.A = x->above_context;
|
||||
dx->detoken.L = x->left_context;
|
||||
|
||||
type = 3;
|
||||
|
||||
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
type = 1;
|
||||
eobtotal -= 16;
|
||||
}
|
||||
|
||||
vp8_decode_mb_tokens_v6(&dx->detoken, type);
|
||||
|
||||
for (i = 0; i < 25; i++)
|
||||
{
|
||||
x->eobs[i] = dx->detoken.eob[i];
|
||||
eobtotal += dx->detoken.eob[i];
|
||||
}
|
||||
|
||||
return eobtotal;
|
||||
}
|
||||
#else
|
||||
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
|
||||
{
|
||||
ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
|
||||
@ -423,4 +363,3 @@ BLOCK_FINISHED:
|
||||
return eobtotal;
|
||||
|
||||
}
|
||||
#endif /*!CONFIG_ASM_DETOK*/
|
||||
|
@ -14,10 +14,6 @@
|
||||
|
||||
#include "onyxd_int.h"
|
||||
|
||||
#if ARCH_ARM
|
||||
#include "arm/detokenize_arm.h"
|
||||
#endif
|
||||
|
||||
void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
|
||||
int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
|
||||
|
||||
|
@ -132,9 +132,6 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
vp8_init_detokenizer(pbi);
|
||||
#endif
|
||||
pbi->common.error.setjmp = 0;
|
||||
return (VP8D_PTR) pbi;
|
||||
}
|
||||
|
@ -58,21 +58,6 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK
|
||||
RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
}
|
||||
|
||||
void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode)
|
||||
{
|
||||
vp8_predict_intra4x4(b, best_mode, b->predictor);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
|
||||
|
||||
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
|
||||
|
||||
x->quantize_b(be, b);
|
||||
|
||||
IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32);
|
||||
|
||||
RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
}
|
||||
|
||||
void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
|
||||
{
|
||||
int i;
|
||||
@ -168,17 +153,3 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
|
||||
}
|
||||
|
||||
void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv(&x->e_mbd);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
|
||||
|
||||
vp8_transform_mbuv(x);
|
||||
|
||||
vp8_quantize_mbuv(x);
|
||||
|
||||
vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
|
||||
|
||||
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
|
||||
}
|
||||
|
@ -19,6 +19,5 @@ void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);
|
||||
void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
|
||||
void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode);
|
||||
void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
|
||||
void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
|
||||
|
||||
#endif
|
||||
|
@ -331,7 +331,8 @@ static void setup_features(VP8_COMP *cpi)
|
||||
|
||||
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
|
||||
{
|
||||
vpx_free(cpi->tplist);
|
||||
if(cpi->tplist!=0)
|
||||
vpx_free(cpi->tplist);
|
||||
cpi->tplist = NULL;
|
||||
|
||||
// Delete last frame MV storage buffers
|
||||
@ -1472,6 +1473,9 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
|
||||
cpi->mt_sync_range = 16;
|
||||
#endif
|
||||
|
||||
if(cpi->tplist);
|
||||
vpx_free(cpi->tplist);
|
||||
|
||||
CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
|
||||
}
|
||||
|
||||
|
@ -645,14 +645,41 @@ static void macro_block_yrd( MACROBLOCK *mb,
|
||||
*Rate = vp8_rdcost_mby(mb);
|
||||
}
|
||||
|
||||
static void save_predictor(unsigned char *predictor, unsigned char *dst)
|
||||
{
|
||||
int r, c;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
*dst = predictor[c];
|
||||
dst++;
|
||||
}
|
||||
|
||||
predictor += 16;
|
||||
}
|
||||
}
|
||||
static void restore_predictor(unsigned char *predictor, unsigned char *dst)
|
||||
{
|
||||
int r, c;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
predictor[c] = *dst;
|
||||
dst++;
|
||||
}
|
||||
|
||||
predictor += 16;
|
||||
}
|
||||
}
|
||||
static int rd_pick_intra4x4block(
|
||||
VP8_COMP *cpi,
|
||||
MACROBLOCK *x,
|
||||
BLOCK *be,
|
||||
BLOCKD *b,
|
||||
B_PREDICTION_MODE *best_mode,
|
||||
B_PREDICTION_MODE above,
|
||||
B_PREDICTION_MODE left,
|
||||
unsigned int *bmode_costs,
|
||||
ENTROPY_CONTEXT *a,
|
||||
ENTROPY_CONTEXT *l,
|
||||
|
||||
@ -661,31 +688,27 @@ static int rd_pick_intra4x4block(
|
||||
int *bestdistortion)
|
||||
{
|
||||
B_PREDICTION_MODE mode;
|
||||
int best_rd = INT_MAX; // 1<<30
|
||||
int best_rd = INT_MAX;
|
||||
int rate = 0;
|
||||
int distortion;
|
||||
unsigned int *mode_costs;
|
||||
|
||||
ENTROPY_CONTEXT ta = *a, tempa = *a;
|
||||
ENTROPY_CONTEXT tl = *l, templ = *l;
|
||||
|
||||
|
||||
if (x->e_mbd.frame_type == KEY_FRAME)
|
||||
{
|
||||
mode_costs = x->bmode_costs[above][left];
|
||||
}
|
||||
else
|
||||
{
|
||||
mode_costs = x->inter_bmode_costs;
|
||||
}
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16);
|
||||
DECLARE_ALIGNED_ARRAY(16, short, dqcoeff, 16);
|
||||
|
||||
for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
|
||||
{
|
||||
int this_rd;
|
||||
int ratey;
|
||||
|
||||
rate = mode_costs[mode];
|
||||
vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, mode);
|
||||
rate = bmode_costs[mode];
|
||||
|
||||
vp8_predict_intra4x4(b, mode, b->predictor);
|
||||
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
|
||||
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b(be, b);
|
||||
|
||||
tempa = ta;
|
||||
templ = tl;
|
||||
@ -705,17 +728,23 @@ static int rd_pick_intra4x4block(
|
||||
*best_mode = mode;
|
||||
*a = tempa;
|
||||
*l = templ;
|
||||
save_predictor(b->predictor, predictor);
|
||||
vpx_memcpy(dqcoeff, b->dqcoeff, 32);
|
||||
}
|
||||
}
|
||||
|
||||
b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
|
||||
vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, b->bmi.mode);
|
||||
|
||||
restore_predictor(b->predictor, predictor);
|
||||
vpx_memcpy(b->dqcoeff, dqcoeff, 32);
|
||||
|
||||
IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(b->dqcoeff, b->diff, 32);
|
||||
RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
|
||||
return best_rd;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
|
||||
int *rate_y, int *Distortion, int best_rd)
|
||||
{
|
||||
@ -728,6 +757,7 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
|
||||
ENTROPY_CONTEXT_PLANES t_above, t_left;
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
unsigned int *bmode_costs;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
@ -737,17 +767,25 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
|
||||
|
||||
vp8_intra_prediction_down_copy(xd);
|
||||
|
||||
bmode_costs = mb->inter_bmode_costs;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
MODE_INFO *const mic = xd->mode_info_context;
|
||||
const int mis = xd->mode_info_stride;
|
||||
const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
|
||||
const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
|
||||
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
|
||||
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
|
||||
|
||||
if (mb->e_mbd.frame_type == KEY_FRAME)
|
||||
{
|
||||
const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
|
||||
const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
|
||||
|
||||
bmode_costs = mb->bmode_costs[A][L];
|
||||
}
|
||||
|
||||
total_rd += rd_pick_intra4x4block(
|
||||
cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
|
||||
cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
|
||||
ta + vp8_block2above[i],
|
||||
tl + vp8_block2left[i], &r, &ry, &d);
|
||||
|
||||
@ -769,7 +807,6 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
|
||||
|
||||
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
|
||||
}
|
||||
|
||||
int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
|
||||
MACROBLOCK *x,
|
||||
int *Rate,
|
||||
@ -837,15 +874,6 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_get_mbuvrecon_error(const vp8_variance_rtcd_vtable_t *rtcd, const MACROBLOCK *x) // sum of squares
|
||||
{
|
||||
unsigned int sse0, sse1;
|
||||
int sum0, sum1;
|
||||
VARIANCE_INVOKE(rtcd, get8x8var)(x->src.u_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, x->e_mbd.dst.uv_stride, &sse0, &sum0);
|
||||
VARIANCE_INVOKE(rtcd, get8x8var)(x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride, &sse1, &sum1);
|
||||
return (sse0 + sse1);
|
||||
}
|
||||
|
||||
static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
|
||||
{
|
||||
vp8_build_uvmvs(&x->e_mbd, fullpixel);
|
||||
@ -873,7 +901,12 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra
|
||||
int this_rd;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
|
||||
vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x);
|
||||
vp8_build_intra_predictors_mbuv(&x->e_mbd);
|
||||
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
|
||||
x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
|
||||
x->src.uv_stride);
|
||||
vp8_transform_mbuv(x);
|
||||
vp8_quantize_mbuv(x);
|
||||
|
||||
rate_to = rd_cost_mbuv(x);
|
||||
rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
|
||||
|
@ -586,52 +586,45 @@ sym(vp8_sad16x16_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
lea end_ptr, [src_ptr+src_stride*8]
|
||||
|
||||
lea end_ptr, [end_ptr+src_stride*8]
|
||||
pxor mm7, mm7
|
||||
mov end_ptr, 4
|
||||
pxor xmm7, xmm7
|
||||
|
||||
.vp8_sad16x16_sse3_loop:
|
||||
|
||||
movq ret_var, mm7
|
||||
cmp ret_var, max_err
|
||||
jg .vp8_sad16x16_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [src_ptr]
|
||||
movq mm2, QWORD PTR [src_ptr+8]
|
||||
|
||||
movq mm1, QWORD PTR [ref_ptr]
|
||||
movq mm3, QWORD PTR [ref_ptr+8]
|
||||
|
||||
movq mm4, QWORD PTR [src_ptr+src_stride]
|
||||
movq mm5, QWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
psadbw mm0, mm1
|
||||
psadbw mm2, mm3
|
||||
|
||||
movq mm1, QWORD PTR [src_ptr+src_stride+8]
|
||||
movq mm3, QWORD PTR [ref_ptr+ref_stride+8]
|
||||
|
||||
psadbw mm4, mm5
|
||||
psadbw mm1, mm3
|
||||
movdqa xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
paddw mm0, mm2
|
||||
paddw mm4, mm1
|
||||
movdqa xmm4, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
|
||||
|
||||
paddw mm7, mm0
|
||||
paddw mm7, mm4
|
||||
psadbw xmm0, xmm1
|
||||
|
||||
cmp src_ptr, end_ptr
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
psadbw xmm2, xmm3
|
||||
psadbw xmm4, xmm5
|
||||
psadbw xmm6, xmm1
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
paddw xmm7, xmm0
|
||||
paddw xmm7, xmm2
|
||||
paddw xmm7, xmm4
|
||||
paddw xmm7, xmm6
|
||||
|
||||
sub end_ptr, 1
|
||||
jne .vp8_sad16x16_sse3_loop
|
||||
|
||||
movq ret_var, mm7
|
||||
|
||||
.vp8_sad16x16_early_exit:
|
||||
|
||||
mov rax, ret_var
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
paddw xmm0, xmm7
|
||||
movq rax, xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
|
@ -15,7 +15,6 @@ VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
|
||||
VP8_CX_SRCS-$(ARCH_ARM) += decoder/asm_dec_offsets.c
|
||||
|
||||
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c
|
||||
VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM)
|
||||
|
||||
#File list for armv6
|
||||
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
|
||||
|
Loading…
x
Reference in New Issue
Block a user