Changed above and left context data layout

The main reason for the change was to reduce cycles in the token
decoder. (~1.5% gain for 32 bit)  This layout should be more
cache friendly.

As a result of this change, the encoder had to be updated.

Change-Id: Id5e804169d8889da0378b3a519ac04dabd28c837
Note: dixie uses a similar layout
This commit is contained in:
Scott LaVarnway 2010-08-31 10:49:57 -04:00
parent aaad6d1b54
commit e85e631504
15 changed files with 216 additions and 359 deletions

View File

@ -42,16 +42,10 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
vpx_free(oci->above_context[Y1CONTEXT]); vpx_free(oci->above_context);
vpx_free(oci->above_context[UCONTEXT]);
vpx_free(oci->above_context[VCONTEXT]);
vpx_free(oci->above_context[Y2CONTEXT]);
vpx_free(oci->mip); vpx_free(oci->mip);
oci->above_context[Y1CONTEXT] = 0; oci->above_context = 0;
oci->above_context[UCONTEXT] = 0;
oci->above_context[VCONTEXT] = 0;
oci->above_context[Y2CONTEXT] = 0;
oci->mip = 0; oci->mip = 0;
} }
@ -118,33 +112,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
oci->mi = oci->mip + oci->mode_info_stride + 1; oci->mi = oci->mip + oci->mode_info_stride + 1;
oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1); oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
if (!oci->above_context[Y1CONTEXT]) if (!oci->above_context)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[UCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
if (!oci->above_context[UCONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[VCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
if (!oci->above_context[VCONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols , 1);
if (!oci->above_context[Y2CONTEXT])
{ {
vp8_de_alloc_frame_buffers(oci); vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE; return ALLOC_FAILURE;

View File

@ -12,13 +12,13 @@
#include "blockd.h" #include "blockd.h"
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count)
{
vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count);
vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count);
}
const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0};
const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0};
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1}; const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3};
const unsigned char vp8_block2left[25] =
{
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
};
const unsigned char vp8_block2above[25] =
{
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
};

View File

@ -49,19 +49,19 @@ typedef struct
} POS; } POS;
typedef int ENTROPY_CONTEXT; typedef char ENTROPY_CONTEXT;
typedef struct typedef struct
{ {
ENTROPY_CONTEXT l[4]; ENTROPY_CONTEXT y1[4];
ENTROPY_CONTEXT a[4]; ENTROPY_CONTEXT u[2];
} TEMP_CONTEXT; ENTROPY_CONTEXT v[2];
ENTROPY_CONTEXT y2;
} ENTROPY_CONTEXT_PLANES;
extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count);
extern const int vp8_block2left[25];
extern const int vp8_block2above[25];
extern const int vp8_block2type[25]; extern const int vp8_block2type[25];
extern const int vp8_block2context[25];
extern const unsigned char vp8_block2left[25];
extern const unsigned char vp8_block2above[25];
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
Dest = ((A)!=0) + ((B)!=0); Dest = ((A)!=0) + ((B)!=0);
@ -237,8 +237,8 @@ typedef struct
int left_available; int left_available;
// Y,U,V,Y2 // Y,U,V,Y2
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane ENTROPY_CONTEXT_PLANES *above_context;
ENTROPY_CONTEXT(*left_context)[4]; // (up to) 4 contexts "" ENTROPY_CONTEXT_PLANES *left_context;
// 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
unsigned char segmentation_enabled; unsigned char segmentation_enabled;

View File

@ -165,8 +165,8 @@ typedef struct VP8Common
int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1 int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1
// Y,U,V,Y2 // Y,U,V,Y2
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane ENTROPY_CONTEXT_PLANES *above_context; // row of context for each plane
ENTROPY_CONTEXT left_context[4][4]; // (up to) 4 contexts "" ENTROPY_CONTEXT_PLANES left_context; // (up to) 4 contexts ""
// keyframe block modes are predicted by their above, left neighbors // keyframe block modes are predicted by their above, left neighbors

View File

@ -13,7 +13,7 @@
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) = DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
{ {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

View File

@ -95,7 +95,7 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
#define IF_RTCD(x) NULL #define IF_RTCD(x) NULL
//#endif //#endif
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
/* wrapper functions to hide RTCD. static means inline means hopefully no /* wrapper functions to hide RTCD. static means inline means hopefully no
* penalty * penalty

View File

@ -338,15 +338,12 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
recon_yoffset = mb_row * recon_y_stride * 16; recon_yoffset = mb_row * recon_y_stride * 16;
recon_uvoffset = mb_row * recon_uv_stride * 8; recon_uvoffset = mb_row * recon_uv_stride * 8;
// reset above block coeffs // reset above block coeffs
xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT]; xd->above_context = pc->above_context;
xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
xd->up_available = (mb_row != 0); xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3; xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@ -403,10 +400,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
++xd->mode_info_context; /* next mb */ ++xd->mode_info_context; /* next mb */
xd->above_context[Y1CONTEXT] += 4; xd->above_context++;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
pbi->current_mb_col_main = mb_col; pbi->current_mb_col_main = mb_col;
} }
@ -561,7 +555,7 @@ static void init_frame(VP8D_COMP *pbi)
} }
} }
xd->left_context = pc->left_context; xd->left_context = &pc->left_context;
xd->mode_info_context = pc->mi; xd->mode_info_context = pc->mi;
xd->frame_type = pc->frame_type; xd->frame_type = pc->frame_type;
xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.mode = DC_PRED;
@ -849,11 +843,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
else else
vp8_decode_mode_mvs(pbi); vp8_decode_mode_mvs(pbi);
// reset since these guys are used as iterators vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4);
vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO)); vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));

View File

@ -19,7 +19,7 @@
#define BOOL_DATA UINT8 #define BOOL_DATA UINT8
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X}; DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
#define EOB_CONTEXT_NODE 0 #define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1 #define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2 #define ONE_CONTEXT_NODE 2
@ -61,47 +61,16 @@ DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTR
void vp8_reset_mb_tokens_context(MACROBLOCKD *x) void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{ {
ENTROPY_CONTEXT **const A = x->above_context;
ENTROPY_CONTEXT(* const L)[4] = x->left_context;
ENTROPY_CONTEXT *a;
ENTROPY_CONTEXT *l;
/* Clear entropy contexts for Y blocks */
a = A[Y1CONTEXT];
l = L[Y1CONTEXT];
*a = 0;
*(a+1) = 0;
*(a+2) = 0;
*(a+3) = 0;
*l = 0;
*(l+1) = 0;
*(l+2) = 0;
*(l+3) = 0;
/* Clear entropy contexts for U blocks */
a = A[UCONTEXT];
l = L[UCONTEXT];
*a = 0;
*(a+1) = 0;
*l = 0;
*(l+1) = 0;
/* Clear entropy contexts for V blocks */
a = A[VCONTEXT];
l = L[VCONTEXT];
*a = 0;
*(a+1) = 0;
*l = 0;
*(l+1) = 0;
/* Clear entropy contexts for Y2 blocks */ /* Clear entropy contexts for Y2 blocks */
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{ {
a = A[Y2CONTEXT]; vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
l = L[Y2CONTEXT]; vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
*a = 0; }
*l = 0; else
{
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
} }
} }
@ -132,7 +101,7 @@ void vp8_init_detokenizer(VP8D_COMP *dx)
} }
#endif #endif
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
#define FILL \ #define FILL \
if(count < 0) \ if(count < 0) \
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend); VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
@ -260,8 +229,8 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
#else #else
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{ {
ENTROPY_CONTEXT **const A = x->above_context; ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
ENTROPY_CONTEXT(* const L)[4] = x->left_context; ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
const VP8_COMMON *const oc = & dx->common; const VP8_COMMON *const oc = & dx->common;
BOOL_DECODER *bc = x->current_bc; BOOL_DECODER *bc = x->current_bc;
@ -291,29 +260,24 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
int stop; int stop;
INT16 val, bits_count; INT16 val, bits_count;
INT16 c; INT16 c;
INT16 t;
INT16 v; INT16 v;
const vp8_prob *Prob; const vp8_prob *Prob;
//int *scan;
type = 3; type = 3;
i = 0; i = 0;
stop = 16; stop = 16;
scan = vp8_default_zig_zag1d;
qcoeff_ptr = &x->qcoeff[0];
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{ {
i = 24; i = 24;
stop = 24; stop = 24;
type = 1; type = 1;
qcoeff_ptr = &x->qcoeff[24*16]; qcoeff_ptr += 24*16;
scan = vp8_default_zig_zag1d;
eobtotal -= 16; eobtotal -= 16;
} }
else
{
scan = vp8_default_zig_zag1d;
qcoeff_ptr = &x->qcoeff[0];
}
bufend = bc->user_buffer_end; bufend = bc->user_buffer_end;
bufptr = bc->user_buffer; bufptr = bc->user_buffer;
@ -325,13 +289,15 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0]; coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
BLOCK_LOOP: BLOCK_LOOP:
a = A[ vp8_block2context[i] ] + vp8_block2above[i]; a = A + vp8_block2above[i];
l = L[ vp8_block2context[i] ] + vp8_block2left[i]; l = L + vp8_block2left[i];
c = (INT16)(!type); c = (INT16)(!type);
VP8_COMBINEENTROPYCONTEXTS(t, *a, *l); // Dest = ((A)!=0) + ((B)!=0);
VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
Prob = coef_probs; Prob = coef_probs;
Prob += t * ENTROPY_NODES; Prob += v * ENTROPY_NODES;
DO_WHILE: DO_WHILE:
Prob += vp8_coef_bands_x[c]; Prob += vp8_coef_bands_x[c];
@ -418,9 +384,8 @@ ONE_CONTEXT_NODE_0_:
qcoeff_ptr [ scan[15] ] = (INT16) v; qcoeff_ptr [ scan[15] ] = (INT16) v;
BLOCK_FINISHED: BLOCK_FINISHED:
t = ((eobs[i] = c) != !type); // any nonzero data? *a = *l = ((eobs[i] = c) != !type); // any nonzero data?
eobtotal += c; eobtotal += c;
*a = *l = t;
qcoeff_ptr += 16; qcoeff_ptr += 16;
i++; i++;
@ -430,12 +395,11 @@ BLOCK_FINISHED:
if (i == 25) if (i == 25)
{ {
scan = vp8_default_zig_zag1d;//x->scan_order1d;
type = 0; type = 0;
i = 0; i = 0;
stop = 16; stop = 16;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0]; coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
qcoeff_ptr = &x->qcoeff[0]; qcoeff_ptr -= (24*16 + 16);
goto BLOCK_LOOP; goto BLOCK_LOOP;
} }

View File

@ -157,7 +157,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
ENTROPY_CONTEXT mb_row_left_context[4][4]; ENTROPY_CONTEXT_PLANES mb_row_left_context;
while (1) while (1)
{ {
@ -197,12 +197,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
recon_uvoffset = mb_row * recon_uv_stride * 8; recon_uvoffset = mb_row * recon_uv_stride * 8;
// reset above block coeffs // reset above block coeffs
xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT]; xd->above_context = pc->above_context;
xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT]; xd->left_context = &mb_row_left_context;
xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT]; vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
xd->left_context = mb_row_left_context;
vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
xd->up_available = (mb_row != 0); xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3; xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@ -260,10 +257,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
++xd->mode_info_context; /* next mb */ ++xd->mode_info_context; /* next mb */
xd->above_context[Y1CONTEXT] += 4; xd->above_context++;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
//pbi->mb_row_di[ithread].current_mb_col = mb_col; //pbi->mb_row_di[ithread].current_mb_col = mb_col;
pbi->current_mb_col[mb_row] = mb_col; pbi->current_mb_col[mb_row] = mb_col;
@ -604,15 +598,12 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
if (mb_row > 0) if (mb_row > 0)
last_row_current_mb_col = &pbi->current_mb_col[mb_row -1]; last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
recon_yoffset = mb_row * recon_y_stride * 16; recon_yoffset = mb_row * recon_y_stride * 16;
recon_uvoffset = mb_row * recon_uv_stride * 8; recon_uvoffset = mb_row * recon_uv_stride * 8;
// reset above block coeffs // reset above block coeffs
xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT]; xd->above_context = pc->above_context;
xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
xd->up_available = (mb_row != 0); xd->up_available = (mb_row != 0);
xd->mb_to_top_edge = -((mb_row * 16)) << 3; xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@ -672,10 +663,7 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
++xd->mode_info_context; /* next mb */ ++xd->mode_info_context; /* next mb */
xd->above_context[Y1CONTEXT] += 4; xd->above_context++;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
//pbi->current_mb_col_main = mb_col; //pbi->current_mb_col_main = mb_col;
pbi->current_mb_col[mb_row] = mb_col; pbi->current_mb_col[mb_row] = mb_col;

View File

@ -348,10 +348,7 @@ void encode_mb_row(VP8_COMP *cpi,
// reset above block coeffs // reset above block coeffs
xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT]; xd->above_context = cm->above_context;
xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
xd->up_available = (mb_row != 0); xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16); recon_yoffset = (mb_row * recon_y_stride * 16);
@ -472,10 +469,7 @@ void encode_mb_row(VP8_COMP *cpi,
// skip to next mb // skip to next mb
xd->mode_info_context++; xd->mode_info_context++;
xd->above_context[Y1CONTEXT] += 4; xd->above_context++;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
cpi->current_mb_col_main = mb_col; cpi->current_mb_col_main = mb_col;
} }
@ -626,7 +620,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED;
xd->left_context = cm->left_context; xd->left_context = &cm->left_context;
vp8_zero(cpi->count_mb_ref_frame_usage) vp8_zero(cpi->count_mb_ref_frame_usage)
vp8_zero(cpi->ymode_count) vp8_zero(cpi->ymode_count)
@ -634,17 +628,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
x->mvc = cm->fc.mvc; x->mvc = cm->fc.mvc;
// vp8_zero( entropy_stats) vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
{
ENTROPY_CONTEXT **p = cm->above_context;
const size_t L = cm->mb_cols;
vp8_zero_array(p [Y1CONTEXT], L * 4)
vp8_zero_array(p [ UCONTEXT], L * 2)
vp8_zero_array(p [ VCONTEXT], L * 2)
vp8_zero_array(p [Y2CONTEXT], L)
}
{ {
struct vpx_usec_timer emr_timer; struct vpx_usec_timer emr_timer;
@ -1128,7 +1112,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
extern int cnt_pm; extern int cnt_pm;
#endif #endif
extern void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x); extern void vp8_fix_contexts(MACROBLOCKD *x);
int vp8cx_encode_inter_macroblock int vp8cx_encode_inter_macroblock
( (
@ -1282,7 +1266,7 @@ int vp8cx_encode_inter_macroblock
xd->mode_info_context->mbmi.mb_skip_coeff = 1; xd->mode_info_context->mbmi.mb_skip_coeff = 1;
cpi->skip_true_count ++; cpi->skip_true_count ++;
vp8_fix_contexts(cpi, xd); vp8_fix_contexts(xd);
} }
else else
{ {

View File

@ -488,12 +488,18 @@ void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{ {
int b; int b;
TEMP_CONTEXT t, t2;
int type; int type;
int has_2nd_order; int has_2nd_order;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
x->e_mbd.left_context[Y1CONTEXT], 4);
has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
type = has_2nd_order ? 0 : 3; type = has_2nd_order ? 0 : 3;
@ -501,24 +507,19 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
for (b = 0; b < 16; b++) for (b = 0; b < 16; b++)
{ {
vp8_optimize_b(x, b, type, vp8_optimize_b(x, b, type,
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
} }
vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT],
x->e_mbd.left_context[UCONTEXT], 2);
vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT],
x->e_mbd.left_context[VCONTEXT], 2);
for (b = 16; b < 20; b++) for (b = 16; b < 20; b++)
{ {
vp8_optimize_b(x, b, vp8_block2type[b], vp8_optimize_b(x, b, vp8_block2type[b],
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
} }
for (b = 20; b < 24; b++) for (b = 20; b < 24; b++)
{ {
vp8_optimize_b(x, b, vp8_block2type[b], vp8_optimize_b(x, b, vp8_block2type[b],
t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd); ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
} }
@ -565,17 +566,25 @@ static void vp8_find_mb_skip_coef(MACROBLOCK *x)
void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{ {
int b; int b;
TEMP_CONTEXT t;
int type; int type;
int has_2nd_order; int has_2nd_order;
if (!x->e_mbd.above_context[Y1CONTEXT]) ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
if (!x->e_mbd.above_context)
return; return;
if (!x->e_mbd.left_context[Y1CONTEXT]) if (!x->e_mbd.left_context)
return; return;
vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
x->e_mbd.left_context[Y1CONTEXT], 4); vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
&& x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
type = has_2nd_order ? 0 : 3; type = has_2nd_order ? 0 : 3;
@ -583,7 +592,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
for (b = 0; b < 16; b++) for (b = 0; b < 16; b++)
{ {
vp8_optimize_b(x, b, type, vp8_optimize_b(x, b, type,
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
} }
/* /*
@ -599,33 +608,32 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
{ {
int b; int b;
TEMP_CONTEXT t, t2; ENTROPY_CONTEXT_PLANES t_above, t_left;
if (!x->e_mbd.above_context[UCONTEXT]) ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
if (!x->e_mbd.above_context)
return; return;
if (!x->e_mbd.left_context[UCONTEXT]) if (!x->e_mbd.left_context)
return; return;
if (!x->e_mbd.above_context[VCONTEXT]) vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
return; vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
if (!x->e_mbd.left_context[VCONTEXT]) ta = (ENTROPY_CONTEXT *)&t_above;
return; tl = (ENTROPY_CONTEXT *)&t_left;
vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
for (b = 16; b < 20; b++) for (b = 16; b < 20; b++)
{ {
vp8_optimize_b(x, b, vp8_block2type[b], vp8_optimize_b(x, b, vp8_block2type[b],
t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
} }
for (b = 20; b < 24; b++) for (b = 20; b < 24; b++)
{ {
vp8_optimize_b(x, b, vp8_block2type[b], vp8_optimize_b(x, b, vp8_block2type[b],
t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd); ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
} }
} }

View File

@ -28,7 +28,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread;
VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
ENTROPY_CONTEXT mb_row_left_context[4][4]; ENTROPY_CONTEXT_PLANES mb_row_left_context;
//printf("Started thread %d\n", ithread); //printf("Started thread %d\n", ithread);
@ -68,11 +68,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
last_row_current_mb_col = &cpi->current_mb_col_main; last_row_current_mb_col = &cpi->current_mb_col_main;
// reset above block coeffs // reset above block coeffs
xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT]; xd->above_context = cm->above_context;
xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ]; xd->left_context = &mb_row_left_context;
xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
xd->left_context = mb_row_left_context;
vp8_zero(mb_row_left_context); vp8_zero(mb_row_left_context);
@ -183,10 +180,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// skip to next mb // skip to next mb
xd->mode_info_context++; xd->mode_info_context++;
xd->above_context[Y1CONTEXT] += 4; xd->above_context++;
xd->above_context[UCONTEXT ] += 2;
xd->above_context[VCONTEXT ] += 2;
xd->above_context[Y2CONTEXT] ++;
cpi->mb_row_ei[ithread].current_mb_col = mb_col; cpi->mb_row_ei[ithread].current_mb_col = mb_col;
@ -330,11 +324,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
/*
memcpy(zd->above_context, xd->above_context, sizeof(xd->above_context));
memcpy(zd->mb_segment_tree_probs, xd->mb_segment_tree_probs, sizeof(xd->mb_segment_tree_probs));
memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
*/
for (i = 0; i < 25; i++) for (i = 0; i < 25; i++)
{ {
zd->block[i].dequant = xd->block[i].dequant; zd->block[i].dequant = xd->block[i].dequant;
@ -402,7 +391,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mb->rddiv = cpi->RDDIV; mb->rddiv = cpi->RDDIV;
mb->rdmult = cpi->RDMULT; mb->rdmult = cpi->RDMULT;
mbd->left_context = cm->left_context; mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc; mb->mvc = cm->fc.mvc;
setup_mbby_copy(&mbr_ei[i].mb, x); setup_mbby_copy(&mbr_ei[i].mb, x);

View File

@ -220,13 +220,20 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
{ {
MACROBLOCKD *const xd = &mb->e_mbd; MACROBLOCKD *const xd = &mb->e_mbd;
int i; int i;
TEMP_CONTEXT t;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode
int distortion = 0; int distortion = 0;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
vp8_intra_prediction_down_copy(xd); vp8_intra_prediction_down_copy(xd);
vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
for (i = 0; i < 16; i++) for (i = 0; i < 16; i++)
{ {
@ -239,8 +246,8 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
error += pick_intra4x4block(rtcd, error += pick_intra4x4block(rtcd,
mb, mb->block + i, xd->block + i, &best_mode, A, L, mb, mb->block + i, xd->block + i, &best_mode, A, L,
t.a + vp8_block2above[i], ta + vp8_block2above[i],
t.l + vp8_block2left[i], &r, &d); tl + vp8_block2left[i], &r, &d);
cost += r; cost += r;
distortion += d; distortion += d;

View File

@ -614,24 +614,28 @@ int vp8_rdcost_mby(MACROBLOCK *mb)
{ {
int cost = 0; int cost = 0;
int b; int b;
TEMP_CONTEXT t, t2;
int type = 0; int type = 0;
MACROBLOCKD *x = &mb->e_mbd; MACROBLOCKD *x = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4); vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vp8_setup_temp_context(&t2, x->above_context[Y2CONTEXT], x->left_context[Y2CONTEXT], 1); vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
if (x->mode_info_context->mbmi.mode == SPLITMV) if (x->mode_info_context->mbmi.mode == SPLITMV)
type = 3; type = 3;
for (b = 0; b < 16; b++) for (b = 0; b < 16; b++)
cost += cost_coeffs(mb, x->block + b, type, cost += cost_coeffs(mb, x->block + b, type,
t.a + vp8_block2above[b], t.l + vp8_block2left[b]); ta + vp8_block2above[b], tl + vp8_block2left[b]);
if (x->mode_info_context->mbmi.mode != SPLITMV) if (x->mode_info_context->mbmi.mode != SPLITMV)
cost += cost_coeffs(mb, x->block + 24, 1, cost += cost_coeffs(mb, x->block + 24, 1,
t2.a + vp8_block2above[24], t2.l + vp8_block2left[24]); ta + vp8_block2above[24], tl + vp8_block2left[24]);
return cost; return cost;
} }
@ -710,13 +714,20 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
{ {
MACROBLOCKD *const xd = &mb->e_mbd; MACROBLOCKD *const xd = &mb->e_mbd;
int i; int i;
TEMP_CONTEXT t;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int distortion = 0; int distortion = 0;
int tot_rate_y = 0; int tot_rate_y = 0;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
vp8_intra_prediction_down_copy(xd); vp8_intra_prediction_down_copy(xd);
vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
for (i = 0; i < 16; i++) for (i = 0; i < 16; i++)
{ {
@ -729,8 +740,8 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
rd_pick_intra4x4block( rd_pick_intra4x4block(
cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L, cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
t.a + vp8_block2above[i], ta + vp8_block2above[i],
t.l + vp8_block2left[i], &r, &ry, &d); tl + vp8_block2left[i], &r, &ry, &d);
cost += r; cost += r;
distortion += d; distortion += d;
@ -792,21 +803,26 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
static int rd_cost_mbuv(MACROBLOCK *mb) static int rd_cost_mbuv(MACROBLOCK *mb)
{ {
TEMP_CONTEXT t, t2;
int b; int b;
int cost = 0; int cost = 0;
MACROBLOCKD *x = &mb->e_mbd; MACROBLOCKD *x = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
vp8_setup_temp_context(&t, x->above_context[UCONTEXT], x->left_context[UCONTEXT], 2); vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vp8_setup_temp_context(&t2, x->above_context[VCONTEXT], x->left_context[VCONTEXT], 2); vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 16; b < 20; b++) for (b = 16; b < 20; b++)
cost += cost_coeffs(mb, x->block + b, vp8_block2type[b], cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
t.a + vp8_block2above[b], t.l + vp8_block2left[b]); ta + vp8_block2above[b], tl + vp8_block2left[b]);
for (b = 20; b < 24; b++) for (b = 20; b < 24; b++)
cost += cost_coeffs(mb, x->block + b, vp8_block2type[b], cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]); ta + vp8_block2above[b], tl + vp8_block2left[b]);
return cost; return cost;
} }
@ -995,18 +1011,19 @@ static int labels2mode(
return cost; return cost;
} }
static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, int which_label, TEMP_CONTEXT *t) static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
int which_label, ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl)
{ {
int cost = 0; int cost = 0;
int b; int b;
MACROBLOCKD *x = &mb->e_mbd; MACROBLOCKD *x = &mb->e_mbd;
for (b = 0; b < 16; b++) for (b = 0; b < 16; b++)
if (labels[ b] == which_label) if (labels[ b] == which_label)
cost += cost_coeffs(mb, x->block + b, 3, cost += cost_coeffs(mb, x->block + b, 3,
t->a + vp8_block2above[b], ta + vp8_block2above[b],
t->l + vp8_block2left[b]); tl + vp8_block2left[b]);
return cost; return cost;
@ -1139,9 +1156,20 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
vp8_variance_fn_ptr_t v_fn_ptr; vp8_variance_fn_ptr_t v_fn_ptr;
TEMP_CONTEXT t; ENTROPY_CONTEXT_PLANES t_above, t_left;
TEMP_CONTEXT tb; ENTROPY_CONTEXT *ta;
vp8_setup_temp_context(&t, xc->above_context[Y1CONTEXT], xc->left_context[Y1CONTEXT], 4); ENTROPY_CONTEXT *tl;
ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
ENTROPY_CONTEXT *ta_b;
ENTROPY_CONTEXT *tl_b;
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
ta_b = (ENTROPY_CONTEXT *)&t_above_b;
tl_b = (ENTROPY_CONTEXT *)&t_left_b;
br = 0; br = 0;
bd = 0; bd = 0;
@ -1226,9 +1254,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
int this_rd; int this_rd;
int num00; int num00;
int labelyrate; int labelyrate;
ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
ENTROPY_CONTEXT *ta_s;
ENTROPY_CONTEXT *tl_s;
TEMP_CONTEXT ts; vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
vp8_setup_temp_context(&ts, &t.a[0], &t.l[0], 4); vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
ta_s = (ENTROPY_CONTEXT *)&t_above_s;
tl_s = (ENTROPY_CONTEXT *)&t_left_s;
if (this_mode == NEW4X4) if (this_mode == NEW4X4)
{ {
@ -1313,7 +1347,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4; distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
labelyrate = rdcost_mbsegment_y(x, labels, i, &ts); labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
rate += labelyrate; rate += labelyrate;
this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb); this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
@ -1325,12 +1359,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
bestlabelyrate = labelyrate; bestlabelyrate = labelyrate;
mode_selected = this_mode; mode_selected = this_mode;
best_label_rd = this_rd; best_label_rd = this_rd;
vp8_setup_temp_context(&tb, &ts.a[0], &ts.l[0], 4);
vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
} }
} }
vp8_setup_temp_context(&t, &tb.a[0], &tb.l[0], 4); vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost); labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost);

View File

@ -24,7 +24,7 @@
_int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
#endif #endif
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x); void vp8_fix_contexts(MACROBLOCKD *x);
TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2]; TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
const TOKENEXTRA *vp8_dct_value_tokens_ptr; const TOKENEXTRA *vp8_dct_value_tokens_ptr;
@ -197,79 +197,11 @@ static void tokenize1st_order_b
*a = *l = pt; *a = *l = pt;
} }
#if 0
void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
{ {
//int i; ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
ENTROPY_CONTEXT **const A = x->above_context; ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
ENTROPY_CONTEXT(* const L)[4] = x->left_context;
int plane_type;
int b;
TOKENEXTRA *start = *t;
TOKENEXTRA *tp = *t;
x->mbmi.dc_diff = 1;
vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
{
plane_type = 3;
}
else
{
tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
plane_type = 0;
}
for (b = 0; b < 16; b++)
tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
A[vp8_block2context[b]] + vp8_block2above[b],
L[vp8_block2context[b]] + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++)
tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
A[vp8_block2context[b]] + vp8_block2above[b],
L[vp8_block2context[b]] + vp8_block2left[b], cpi);
if (cpi->common.mb_no_coeff_skip)
{
x->mbmi.mb_skip_coeff = 1;
while ((tp != *t) && x->mbmi.mb_skip_coeff)
{
x->mbmi.mb_skip_coeff = (x->mbmi.mb_skip_coeff && (tp->Token == DCT_EOB_TOKEN));
tp ++;
}
if (x->mbmi.mb_skip_coeff == 1)
{
x->mbmi.dc_diff = 0;
//redo the coutnts
vpx_memcpy(cpi->coef_counts, cpi->coef_counts_backup, sizeof(cpi->coef_counts));
*t = start;
cpi->skip_true_count++;
//skip_true_count++;
}
else
{
cpi->skip_false_count++;
//skip_false_count++;
}
}
}
#else
void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
{
//int i;
ENTROPY_CONTEXT **const A = x->above_context;
ENTROPY_CONTEXT(* const L)[4] = x->left_context;
int plane_type; int plane_type;
int b; int b;
@ -300,7 +232,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
vp8_stuff_mb(cpi, x, t) ; vp8_stuff_mb(cpi, x, t) ;
else else
{ {
vp8_fix_contexts(cpi, x); vp8_fix_contexts(x);
} }
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
@ -354,20 +286,20 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
else else
{ {
tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type, tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi); A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0; plane_type = 0;
} }
for (b = 0; b < 16; b++) for (b = 0; b < 16; b++)
tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type, tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
A[vp8_block2context[b]] + vp8_block2above[b], A + vp8_block2above[b],
L[vp8_block2context[b]] + vp8_block2left[b], cpi); L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++) for (b = 16; b < 24; b++)
tokenize1st_order_b(x->block + b, t, 2, x->frame_type, tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
A[vp8_block2context[b]] + vp8_block2above[b], A + vp8_block2above[b],
L[vp8_block2context[b]] + vp8_block2left[b], cpi); L + vp8_block2left[b], cpi);
#if 0 #if 0
@ -406,7 +338,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
#endif #endif
} }
#endif
#ifdef ENTROPY_STATS #ifdef ENTROPY_STATS
@ -581,14 +513,13 @@ void stuff1st_order_buv
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
{ {
//int i; ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
ENTROPY_CONTEXT **const A = x->above_context; ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
ENTROPY_CONTEXT(* const L)[4] = x->left_context;
int plane_type; int plane_type;
int b; int b;
stuff2nd_order_b(x->block + 24, t, 1, x->frame_type, stuff2nd_order_b(x->block + 24, t, 1, x->frame_type,
A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi); A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0; plane_type = 0;
@ -600,38 +531,27 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
for (b = 0; b < 16; b++) for (b = 0; b < 16; b++)
stuff1st_order_b(x->block + b, t, plane_type, x->frame_type, stuff1st_order_b(x->block + b, t, plane_type, x->frame_type,
A[vp8_block2context[b]] + vp8_block2above[b], A + vp8_block2above[b],
L[vp8_block2context[b]] + vp8_block2left[b], cpi); L + vp8_block2left[b], cpi);
for (b = 16; b < 24; b++) for (b = 16; b < 24; b++)
stuff1st_order_buv(x->block + b, t, 2, x->frame_type, stuff1st_order_buv(x->block + b, t, 2, x->frame_type,
A[vp8_block2context[b]] + vp8_block2above[b], A + vp8_block2above[b],
L[vp8_block2context[b]] + vp8_block2left[b], cpi); L + vp8_block2left[b], cpi);
} }
void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x) void vp8_fix_contexts(MACROBLOCKD *x)
{ {
x->left_context[Y1CONTEXT][0] = 0; /* Clear entropy contexts for Y2 blocks */
x->left_context[Y1CONTEXT][1] = 0;
x->left_context[Y1CONTEXT][2] = 0;
x->left_context[Y1CONTEXT][3] = 0;
x->left_context[UCONTEXT][0] = 0;
x->left_context[VCONTEXT][0] = 0;
x->left_context[UCONTEXT][1] = 0;
x->left_context[VCONTEXT][1] = 0;
x->above_context[Y1CONTEXT][0] = 0;
x->above_context[Y1CONTEXT][1] = 0;
x->above_context[Y1CONTEXT][2] = 0;
x->above_context[Y1CONTEXT][3] = 0;
x->above_context[UCONTEXT][0] = 0;
x->above_context[VCONTEXT][0] = 0;
x->above_context[UCONTEXT][1] = 0;
x->above_context[VCONTEXT][1] = 0;
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
{ {
x->left_context[Y2CONTEXT][0] = 0; vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
x->above_context[Y2CONTEXT][0] = 0; vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
} }
else
{
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
}
} }