13% faster decode_residual (cavlc).
patch by diane_cartman at gmx dot de. Originally committed as revision 4617 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
4e159595b5
commit
ca3b0d2719
@ -4437,8 +4437,8 @@ static inline int get_dct8x8_allowed(H264Context *h){
|
|||||||
static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
|
static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
|
||||||
MpegEncContext * const s = &h->s;
|
MpegEncContext * const s = &h->s;
|
||||||
static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
|
static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
|
||||||
int level[16], run[16];
|
int level[16];
|
||||||
int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
|
int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
|
||||||
|
|
||||||
//FIXME put trailing_onex into the context
|
//FIXME put trailing_onex into the context
|
||||||
|
|
||||||
@ -4471,12 +4471,12 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
|||||||
level[i]= 1 - 2*get_bits1(gb);
|
level[i]= 1 - 2*get_bits1(gb);
|
||||||
}
|
}
|
||||||
|
|
||||||
suffix_length= total_coeff > 10 && trailing_ones < 3;
|
if(i<total_coeff) {
|
||||||
|
|
||||||
for(; i<total_coeff; i++){
|
|
||||||
const int prefix= get_level_prefix(gb);
|
|
||||||
int level_code, mask;
|
int level_code, mask;
|
||||||
|
int suffix_length = total_coeff > 10 && trailing_ones < 3;
|
||||||
|
int prefix= get_level_prefix(gb);
|
||||||
|
|
||||||
|
//first coefficient has suffix_length equal to 0 or 1
|
||||||
if(prefix<14){ //FIXME try to build a large unified VLC table for all this
|
if(prefix<14){ //FIXME try to build a large unified VLC table for all this
|
||||||
if(suffix_length)
|
if(suffix_length)
|
||||||
level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
|
level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
|
||||||
@ -4495,20 +4495,32 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
|
if(trailing_ones < 3) level_code += 2;
|
||||||
|
|
||||||
|
suffix_length = 1;
|
||||||
|
if(level_code > 5)
|
||||||
|
suffix_length++;
|
||||||
mask= -(level_code&1);
|
mask= -(level_code&1);
|
||||||
level[i]= (((2+level_code)>>1) ^ mask) - mask;
|
level[i]= (((2+level_code)>>1) ^ mask) - mask;
|
||||||
|
i++;
|
||||||
|
|
||||||
if(suffix_length==0) suffix_length=1; //FIXME split first iteration
|
//remaining coefficients have suffix_length > 0
|
||||||
|
for(;i<total_coeff;i++) {
|
||||||
#if 1
|
static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
|
||||||
if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
|
prefix = get_level_prefix(gb);
|
||||||
#else
|
if(prefix<15){
|
||||||
if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
|
level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
|
||||||
/* ? == prefix > 2 or sth */
|
}else if(prefix==15){
|
||||||
#endif
|
level_code = (prefix<<suffix_length) + get_bits(gb, 12);
|
||||||
tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
|
}else{
|
||||||
|
av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
mask= -(level_code&1);
|
||||||
|
level[i]= (((2+level_code)>>1) ^ mask) - mask;
|
||||||
|
if(level_code > suffix_limit[suffix_length])
|
||||||
|
suffix_length++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(total_coeff == max_coeff)
|
if(total_coeff == max_coeff)
|
||||||
@ -4520,15 +4532,41 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
|||||||
zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
|
zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i=0; i<total_coeff-1; i++){
|
coeff_num = zeros_left + total_coeff - 1;
|
||||||
|
j = scantable[coeff_num];
|
||||||
|
if(n > 24){
|
||||||
|
block[j] = level[0];
|
||||||
|
for(i=1;i<total_coeff;i++) {
|
||||||
if(zeros_left <= 0)
|
if(zeros_left <= 0)
|
||||||
break;
|
run_before = 0;
|
||||||
else if(zeros_left < 7){
|
else if(zeros_left < 7){
|
||||||
run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
|
run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
|
||||||
}else{
|
}else{
|
||||||
run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
|
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
|
||||||
|
}
|
||||||
|
zeros_left -= run_before;
|
||||||
|
coeff_num -= 1 + run_before;
|
||||||
|
j= scantable[ coeff_num ];
|
||||||
|
|
||||||
|
block[j]= level[i];
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
block[j] = level[0] * qmul[j];
|
||||||
|
for(i=1;i<total_coeff;i++) {
|
||||||
|
if(zeros_left <= 0)
|
||||||
|
run_before = 0;
|
||||||
|
else if(zeros_left < 7){
|
||||||
|
run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
|
||||||
|
}else{
|
||||||
|
run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
|
||||||
|
}
|
||||||
|
zeros_left -= run_before;
|
||||||
|
coeff_num -= 1 + run_before;
|
||||||
|
j= scantable[ coeff_num ];
|
||||||
|
|
||||||
|
block[j]= level[i] * qmul[j];
|
||||||
|
// printf("%d %d ", block[j], qmul[j]);
|
||||||
}
|
}
|
||||||
zeros_left -= run[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(zeros_left<0){
|
if(zeros_left<0){
|
||||||
@ -4536,33 +4574,6 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(; i<total_coeff-1; i++){
|
|
||||||
run[i]= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
run[i]= zeros_left;
|
|
||||||
|
|
||||||
coeff_num=-1;
|
|
||||||
if(n > 24){
|
|
||||||
for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
|
|
||||||
int j;
|
|
||||||
|
|
||||||
coeff_num += run[i] + 1; //FIXME add 1 earlier ?
|
|
||||||
j= scantable[ coeff_num ];
|
|
||||||
|
|
||||||
block[j]= level[i];
|
|
||||||
}
|
|
||||||
}else{
|
|
||||||
for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
|
|
||||||
int j;
|
|
||||||
|
|
||||||
coeff_num += run[i] + 1; //FIXME add 1 earlier ?
|
|
||||||
j= scantable[ coeff_num ];
|
|
||||||
|
|
||||||
block[j]= level[i] * qmul[j];
|
|
||||||
// printf("%d %d ", block[j], qmul[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user