Reorder ifs in chroma hl_decode_mb to avoid a duplicate transform_bypass
check. 14 cpu cycles speedup on Pentium Dual Originally committed as revision 16221 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
6456d6d87c
commit
96465b90a1
@ -2561,25 +2561,28 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
|
if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
|
||||||
uint8_t *dest[2] = {dest_cb, dest_cr};
|
uint8_t *dest[2] = {dest_cb, dest_cr};
|
||||||
if(transform_bypass){
|
if(transform_bypass){
|
||||||
idct_add = idct_dc_add = s->dsp.add_pixels4;
|
idct_add = s->dsp.add_pixels4;
|
||||||
|
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
|
||||||
|
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
|
||||||
|
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
|
||||||
|
}else{
|
||||||
|
for(i=16; i<16+8; i++){
|
||||||
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
|
||||||
|
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
|
||||||
|
}
|
||||||
|
}
|
||||||
}else{
|
}else{
|
||||||
idct_add = s->dsp.h264_idct_add;
|
idct_add = s->dsp.h264_idct_add;
|
||||||
idct_dc_add = s->dsp.h264_idct_dc_add;
|
idct_dc_add = s->dsp.h264_idct_dc_add;
|
||||||
chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
||||||
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
||||||
}
|
|
||||||
if(is_h264){
|
if(is_h264){
|
||||||
if(transform_bypass && IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
|
|
||||||
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
|
|
||||||
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
|
|
||||||
}else{
|
|
||||||
for(i=16; i<16+8; i++){
|
for(i=16; i<16+8; i++){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ])
|
if(h->non_zero_count_cache[ scan8[i] ])
|
||||||
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
|
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
|
||||||
else if(h->mb[i*16])
|
else if(h->mb[i*16])
|
||||||
idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
|
idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}else{
|
}else{
|
||||||
for(i=16; i<16+8; i++){
|
for(i=16; i<16+8; i++){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
||||||
@ -2590,6 +2593,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if(h->deblocking_filter) {
|
if(h->deblocking_filter) {
|
||||||
backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
|
backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
|
||||||
fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
|
fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
|
||||||
|
Loading…
x
Reference in New Issue
Block a user