replace a few and/sub/... by cmov
this is faster on P3, should be faster on AMD, and should be slower on P4 its disabled by default (benchmarks welcome so we know when to enable it) Originally committed as revision 6615 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
c6a9271c56
commit
ab0151d163
@ -459,6 +459,14 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
|
||||
"movl "LOW "(%2), %%ebx \n\t"
|
||||
//eax:state ebx:low, edx:range, esi:RangeLPS
|
||||
"subl %%esi, %%edx \n\t"
|
||||
#ifdef CMOV_IS_FAST //FIXME actually define this somewhere
|
||||
"cmpl %%ebx, %%edx \n\t"
|
||||
"cmova %%edx, %%esi \n\t"
|
||||
"sbbl %%ecx, %%ecx \n\t"
|
||||
"andl %%ecx, %%edx \n\t"
|
||||
"subl %%edx, %%ebx \n\t"
|
||||
"xorl %%ecx, %%eax \n\t"
|
||||
#else
|
||||
"movl %%edx, %%ecx \n\t"
|
||||
"subl %%ebx, %%edx \n\t"
|
||||
"sarl $31, %%edx \n\t" //lps_mask
|
||||
@ -467,9 +475,10 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
|
||||
"addl %%ecx, %%esi \n\t" //new range
|
||||
"andl %%edx, %%ecx \n\t"
|
||||
"subl %%ecx, %%ebx \n\t"
|
||||
"xorl %%edx, %%eax \n\t"
|
||||
#endif
|
||||
|
||||
//eax:state ebx:low edx:mask esi:range
|
||||
"xorl %%edx, %%eax \n\t"
|
||||
"movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t"
|
||||
"movb %%cl, (%1) \n\t"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user