Increase alignment of av_malloc() as needed by AVX ASM.
Signed-off-by: Reinhard Tartler <siretart@tauware.de>
This commit is contained in:
parent
33cbfa6fa3
commit
13dfce3d44
@ -69,21 +69,21 @@ void *av_malloc(size_t size)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* let's disallow possible ambiguous cases */
|
/* let's disallow possible ambiguous cases */
|
||||||
if(size > (INT_MAX-16) )
|
if(size > (INT_MAX-32) )
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
#if CONFIG_MEMALIGN_HACK
|
#if CONFIG_MEMALIGN_HACK
|
||||||
ptr = malloc(size+16);
|
ptr = malloc(size+32);
|
||||||
if(!ptr)
|
if(!ptr)
|
||||||
return ptr;
|
return ptr;
|
||||||
diff= ((-(long)ptr - 1)&15) + 1;
|
diff= ((-(long)ptr - 1)&31) + 1;
|
||||||
ptr = (char*)ptr + diff;
|
ptr = (char*)ptr + diff;
|
||||||
((char*)ptr)[-1]= diff;
|
((char*)ptr)[-1]= diff;
|
||||||
#elif HAVE_POSIX_MEMALIGN
|
#elif HAVE_POSIX_MEMALIGN
|
||||||
if (posix_memalign(&ptr,16,size))
|
if (posix_memalign(&ptr,32,size))
|
||||||
ptr = NULL;
|
ptr = NULL;
|
||||||
#elif HAVE_MEMALIGN
|
#elif HAVE_MEMALIGN
|
||||||
ptr = memalign(16,size);
|
ptr = memalign(32,size);
|
||||||
/* Why 64?
|
/* Why 64?
|
||||||
Indeed, we should align it:
|
Indeed, we should align it:
|
||||||
on 4 for 386
|
on 4 for 386
|
||||||
@ -93,10 +93,8 @@ void *av_malloc(size_t size)
|
|||||||
Because L1 and L2 caches are aligned on those values.
|
Because L1 and L2 caches are aligned on those values.
|
||||||
But I don't want to code such logic here!
|
But I don't want to code such logic here!
|
||||||
*/
|
*/
|
||||||
/* Why 16?
|
/* Why 32?
|
||||||
Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs
|
For AVX ASM. SSE / NEON needs only 16.
|
||||||
it will just trigger an exception and the unaligned load will be done in the
|
|
||||||
exception handler or it will just segfault (SSE2 on P4).
|
|
||||||
Why not larger? Because I did not see a difference in benchmarks ...
|
Why not larger? Because I did not see a difference in benchmarks ...
|
||||||
*/
|
*/
|
||||||
/* benchmarks with P3
|
/* benchmarks with P3
|
||||||
|
Loading…
x
Reference in New Issue
Block a user