factorize get_cabac asm (0.5% slower but its much cleaner)
Originally committed as revision 6740 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
1e4051aaa0
commit
13404b2e98
@ -452,71 +452,72 @@ static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state
|
||||
);
|
||||
bit&=1;
|
||||
#else /* BRANCHLESS_CABAC_DECODER */
|
||||
asm volatile(
|
||||
"movzbl (%1), %0 \n\t"
|
||||
"movl "RANGE "(%2), %%ebx \n\t"
|
||||
"movl "RANGE "(%2), %%edx \n\t"
|
||||
"andl $0xC0, %%ebx \n\t"
|
||||
"movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
|
||||
"movl "LOW "(%2), %%ebx \n\t"
|
||||
//eax:state ebx:low, edx:range, esi:RangeLPS
|
||||
"subl %%esi, %%edx \n\t"
|
||||
|
||||
|
||||
#if (defined CMOV_IS_FAST && __CPU__ >= 686)
|
||||
"movl %%edx, %%ecx \n\t"
|
||||
"shl $17, %%edx \n\t"
|
||||
"cmpl %%ebx, %%edx \n\t"
|
||||
"cmova %%ecx, %%esi \n\t"
|
||||
"sbbl %%ecx, %%ecx \n\t"
|
||||
"andl %%ecx, %%edx \n\t"
|
||||
"subl %%edx, %%ebx \n\t"
|
||||
"xorl %%ecx, %0 \n\t"
|
||||
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
||||
"mov "tmp" , %%ecx \n\t"\
|
||||
"shl $17 , "tmp" \n\t"\
|
||||
"cmp "low" , "tmp" \n\t"\
|
||||
"cmova %%ecx , "range" \n\t"\
|
||||
"sbb %%ecx , %%ecx \n\t"\
|
||||
"and %%ecx , "tmp" \n\t"\
|
||||
"sub "tmp" , "low" \n\t"\
|
||||
"xor %%ecx , "ret" \n\t"
|
||||
#else /* CMOV_IS_FAST */
|
||||
"movl %%edx, %%ecx \n\t"
|
||||
"shl $17, %%edx \n\t"
|
||||
"subl %%ebx, %%edx \n\t"
|
||||
"sarl $31, %%edx \n\t" //lps_mask
|
||||
"subl %%ecx, %%esi \n\t" //RangeLPS - range
|
||||
"andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
|
||||
"addl %%ecx, %%esi \n\t" //new range
|
||||
"shl $17, %%ecx \n\t"
|
||||
"andl %%edx, %%ecx \n\t"
|
||||
"subl %%ecx, %%ebx \n\t"
|
||||
"xorl %%edx, %0 \n\t"
|
||||
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
||||
"mov "tmp" , %%ecx \n\t"\
|
||||
"shl $17 , "tmp" \n\t"\
|
||||
"sub "low" , "tmp" \n\t"\
|
||||
"sar $31 , "tmp" \n\t" /*lps_mask*/\
|
||||
"sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
|
||||
"and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
|
||||
"add %%ecx , "range" \n\t" /*new range*/\
|
||||
"shl $17 , %%ecx \n\t"\
|
||||
"and "tmp" , %%ecx \n\t"\
|
||||
"sub %%ecx , "low" \n\t"\
|
||||
"xor "tmp" , "ret" \n\t"
|
||||
#endif /* CMOV_IS_FAST */
|
||||
|
||||
//eax:state ebx:low edx:mask esi:range
|
||||
|
||||
//eax:bit ebx:low esi:range
|
||||
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
||||
"movzbl "statep" , "ret" \n\t"\
|
||||
"mov "range" , "tmp" \n\t"\
|
||||
"and $0xC0 , "range" \n\t"\
|
||||
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
|
||||
"sub "range" , "tmp" \n\t"\
|
||||
BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
|
||||
"shl %%cl , "range" \n\t"\
|
||||
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
|
||||
"mov "tmpbyte" , "statep" \n\t"\
|
||||
"shl %%cl , "low" \n\t"\
|
||||
"test "lowword" , "lowword" \n\t"\
|
||||
" jnz 1f \n\t"\
|
||||
"mov "BYTE"("cabac"), %%ecx \n\t"\
|
||||
"movzwl (%%ecx) , "tmp" \n\t"\
|
||||
"bswap "tmp" \n\t"\
|
||||
"shr $15 , "tmp" \n\t"\
|
||||
"sub $0xFFFF , "tmp" \n\t"\
|
||||
"add $2 , %%ecx \n\t"\
|
||||
"mov %%ecx , "BYTE "("cabac") \n\t"\
|
||||
"lea -1("low") , %%ecx \n\t"\
|
||||
"xor "low" , %%ecx \n\t"\
|
||||
"shr $15 , %%ecx \n\t"\
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
|
||||
"neg %%ecx \n\t"\
|
||||
"add $7 , %%ecx \n\t"\
|
||||
"shl %%cl , "tmp" \n\t"\
|
||||
"add "tmp" , "low" \n\t"\
|
||||
"1: \n\t"
|
||||
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
|
||||
"shll %%cl, %%esi \n\t"
|
||||
"movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
|
||||
"movb %%dl, (%1) \n\t"
|
||||
asm volatile(
|
||||
"movl "RANGE "(%2), %%esi \n\t"
|
||||
"movl "LOW "(%2), %%ebx \n\t"
|
||||
BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
|
||||
"movl %%esi, "RANGE "(%2) \n\t"
|
||||
"shll %%cl, %%ebx \n\t"
|
||||
"movl %%ebx, "LOW "(%2) \n\t"
|
||||
"test %%bx, %%bx \n\t"
|
||||
" jnz 1f \n\t"
|
||||
|
||||
"movl "BYTE "(%2), %%ecx \n\t"
|
||||
"movzwl (%%ecx), %%esi \n\t"
|
||||
"bswap %%esi \n\t"
|
||||
"shrl $15, %%esi \n\t"
|
||||
"subl $0xFFFF, %%esi \n\t"
|
||||
"addl $2, %%ecx \n\t"
|
||||
"movl %%ecx, "BYTE "(%2) \n\t"
|
||||
|
||||
"leal -1(%%ebx), %%ecx \n\t"
|
||||
"xorl %%ebx, %%ecx \n\t"
|
||||
"shrl $15, %%ecx \n\t"
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
|
||||
"neg %%ecx \n\t"
|
||||
"add $7, %%ecx \n\t"
|
||||
|
||||
"shll %%cl , %%esi \n\t"
|
||||
"addl %%esi, %%ebx \n\t"
|
||||
"movl %%ebx, "LOW "(%2) \n\t"
|
||||
"1: \n\t"
|
||||
:"=&a"(bit)
|
||||
:"r"(state), "r"(c)
|
||||
: "%ecx", "%ebx", "%edx", "%esi", "memory"
|
||||
@ -683,62 +684,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *sign
|
||||
|
||||
"2: \n\t"
|
||||
|
||||
"movzbl (%1), %0 \n\t"
|
||||
"movl %%esi, %%edx \n\t"
|
||||
"andl $0xC0, %%esi \n\t"
|
||||
"movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t"
|
||||
/*eax:state ebx:low, edx:range, esi:RangeLPS*/
|
||||
"subl %%esi, %%edx \n\t"
|
||||
|
||||
#if (defined CMOV_IS_FAST && __CPU__ >= 686)
|
||||
"movl %%edx, %%ecx \n\t"
|
||||
"shl $17, %%edx \n\t"
|
||||
"cmpl %%ebx, %%edx \n\t"
|
||||
"cmova %%ecx, %%esi \n\t"
|
||||
"sbbl %%ecx, %%ecx \n\t"
|
||||
"andl %%ecx, %%edx \n\t"
|
||||
"subl %%edx, %%ebx \n\t"
|
||||
"xorl %%ecx, %0 \n\t"
|
||||
#else /* CMOV_IS_FAST */
|
||||
"movl %%edx, %%ecx \n\t"
|
||||
"shl $17, %%edx \n\t"
|
||||
"subl %%ebx, %%edx \n\t"
|
||||
"sarl $31, %%edx \n\t" //lps_mask
|
||||
"subl %%ecx, %%esi \n\t" //RangeLPS - range
|
||||
"andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
|
||||
"addl %%ecx, %%esi \n\t" //new range
|
||||
"shl $17, %%ecx \n\t"
|
||||
"andl %%edx, %%ecx \n\t"
|
||||
"subl %%ecx, %%ebx \n\t"
|
||||
"xorl %%edx, %0 \n\t"
|
||||
#endif /* CMOV_IS_FAST */
|
||||
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
|
||||
"shll %%cl, %%esi \n\t"
|
||||
"movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
|
||||
"movb %%dl, (%1) \n\t"
|
||||
"shll %%cl, %%ebx \n\t"
|
||||
"test %%bx, %%bx \n\t"
|
||||
" jnz 1f \n\t"
|
||||
|
||||
"movl "BYTE "(%3), %%ecx \n\t"
|
||||
"movzwl (%%ecx), %%edx \n\t"
|
||||
"bswap %%edx \n\t"
|
||||
"shrl $15, %%edx \n\t"
|
||||
"subl $0xFFFF, %%edx \n\t"
|
||||
"addl $2, %%ecx \n\t"
|
||||
"movl %%ecx, "BYTE "(%3) \n\t"
|
||||
|
||||
"leal -1(%%ebx), %%ecx \n\t"
|
||||
"xorl %%ebx, %%ecx \n\t"
|
||||
"shrl $15, %%ecx \n\t"
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
|
||||
"neg %%ecx \n\t"
|
||||
"add $7, %%ecx \n\t"
|
||||
|
||||
"shll %%cl , %%edx \n\t"
|
||||
"addl %%edx, %%ebx \n\t"
|
||||
"1: \n\t"
|
||||
BRANCHLESS_GET_CABAC("%0", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
|
||||
|
||||
"test $1, %0 \n\t"
|
||||
" jz 3f \n\t"
|
||||
@ -750,62 +696,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *sign
|
||||
"addl $4, %%eax \n\t"
|
||||
"movl %%eax, %2 \n\t"
|
||||
|
||||
"movzbl 61(%1), %0 \n\t"
|
||||
"movl %%esi, %%edx \n\t"
|
||||
"andl $0xC0, %%esi \n\t"
|
||||
"movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t"
|
||||
/*eax:state ebx:low, edx:range, esi:RangeLPS*/
|
||||
"subl %%esi, %%edx \n\t"
|
||||
|
||||
#if (defined CMOV_IS_FAST && __CPU__ >= 686)
|
||||
"movl %%edx, %%ecx \n\t"
|
||||
"shl $17, %%edx \n\t"
|
||||
"cmpl %%ebx, %%edx \n\t"
|
||||
"cmova %%ecx, %%esi \n\t"
|
||||
"sbbl %%ecx, %%ecx \n\t"
|
||||
"andl %%ecx, %%edx \n\t"
|
||||
"subl %%edx, %%ebx \n\t"
|
||||
"xorl %%ecx, %0 \n\t"
|
||||
#else /* CMOV_IS_FAST */
|
||||
"movl %%edx, %%ecx \n\t"
|
||||
"shl $17, %%edx \n\t"
|
||||
"subl %%ebx, %%edx \n\t"
|
||||
"sarl $31, %%edx \n\t" //lps_mask
|
||||
"subl %%ecx, %%esi \n\t" //RangeLPS - range
|
||||
"andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
|
||||
"addl %%ecx, %%esi \n\t" //new range
|
||||
"shl $17, %%ecx \n\t"
|
||||
"andl %%edx, %%ecx \n\t"
|
||||
"subl %%ecx, %%ebx \n\t"
|
||||
"xorl %%edx, %0 \n\t"
|
||||
#endif /* CMOV_IS_FAST */
|
||||
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
|
||||
"shll %%cl, %%esi \n\t"
|
||||
"movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
|
||||
"movb %%dl, 61(%1) \n\t"
|
||||
"shll %%cl, %%ebx \n\t"
|
||||
"test %%bx, %%bx \n\t"
|
||||
" jnz 1f \n\t"
|
||||
|
||||
"movl "BYTE "(%3), %%ecx \n\t"
|
||||
"movzwl (%%ecx), %%edx \n\t"
|
||||
"bswap %%edx \n\t"
|
||||
"shrl $15, %%edx \n\t"
|
||||
"subl $0xFFFF, %%edx \n\t"
|
||||
"addl $2, %%ecx \n\t"
|
||||
"movl %%ecx, "BYTE "(%3) \n\t"
|
||||
|
||||
"leal -1(%%ebx), %%ecx \n\t"
|
||||
"xorl %%ebx, %%ecx \n\t"
|
||||
"shrl $15, %%ecx \n\t"
|
||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
|
||||
"neg %%ecx \n\t"
|
||||
"add $7, %%ecx \n\t"
|
||||
|
||||
"shll %%cl , %%edx \n\t"
|
||||
"addl %%edx, %%ebx \n\t"
|
||||
"1: \n\t"
|
||||
BRANCHLESS_GET_CABAC("%0", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
|
||||
|
||||
"test $1, %%eax \n\t"
|
||||
" jnz 4f \n\t"
|
||||
|
Loading…
x
Reference in New Issue
Block a user