Compare commits

...

9 Commits

Author SHA1 Message Date
Dima Zavin
110044b131 libc: kernel: Update msm_kgsl.h header
Change-Id: I8b22bcb4816fca5e0b92fee4b2ccb0ef135be5a9
Signed-off-by: Dima Zavin <dima@android.com>
2009-11-13 02:29:17 -08:00
Mathias Agopian
199f9d9238 Improve memcpy performance from 290 MiB/s to 340 MiB/s (17% improvment)
use 64 bytes cache lines, reduce the main loop to 64-bytes instead of
128 bytes and adjust the prefetch distance to the optimal value.
2009-10-28 03:17:02 -07:00
Android (Google) Code Review
763ac28357 Merge changes Ibcba4b4f,I9af341e1 into eclair
* changes:
  bionic/linker: make the buddy allocator compute max_order on its own
  bionic/linker: change the buddy allocator to take a handle to the managed area
2009-10-23 21:27:57 -04:00
Andy McFadden
96bbbe2177 Wrap ARM abort() to improve stack trace.
The code generated for Thumb and Thumb2 targets has different handling
for abort().  Because abort() is "noreturn", it doesn't need to preserve
the callee-save registers.  The Thumb2 version trashes LR and makes it
impossible to figure out who called abort().

This inserts a trivial stub function; net effect is stack traces are
reasonable after an abort().

For bug 2191452.

Eclair branch Dr. No approved by: hiroshi
2009-10-21 10:41:12 -07:00
Iliyan Malchev
bb9eedeff4 bionic/linker: make the buddy allocator compute max_order on its own
Signed-off-by: Iliyan Malchev <malchev@google.com>
2009-10-19 18:10:35 -07:00
Iliyan Malchev
af7315acf6 bionic/linker: change the buddy allocator to take a handle to the managed area
-- rename struct ba_info to struct ba
-- move the static ba descriptor from ba.c to linker.c and rename it ba_prelink
-- ba_init, ba_allocate, ba_free, ba_start_addr, and ba_len all take a pointer
   to struct ba

Signed-off-by: Iliyan Malchev <malchev@google.com>
2009-10-19 18:10:35 -07:00
Mathias Agopian
7e7d6c48a0 use local symbols in memset so it doesn't screw up profiling 2009-10-19 16:34:38 -07:00
Android (Google) Code Review
ff7b46b87c Merge change I3c998761 into eclair
* changes:
  Allow the dynamic linker to relocate references to thumb symbols in NDK libraries.
2009-10-16 15:27:36 -04:00
David 'Digit' Turner
3c99876116 Allow the dynamic linker to relocate references to thumb symbols in NDK libraries.
The lowest bit of the symbol's value should be ignored when looking at UNDEF symbols
for correctness. It is used as an ARM/Thumb flag by the linker.
2009-10-13 16:55:18 -07:00
7 changed files with 133 additions and 91 deletions

View File

@@ -37,8 +37,9 @@
.type memcpy, %function
.align 4
/* a prefetch distance of 32*4 works best experimentally */
#define PREFETCH_DISTANCE (32*4)
/* a prefetch distance of 4 cache-lines works best experimentally */
#define CACHE_LINE_SIZE 64
#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*4)
memcpy:
.fnstart
@@ -46,8 +47,8 @@ memcpy:
stmfd sp!, {r0, lr}
/* start preloading as early as possible */
pld [r1, #0]
pld [r1, #32]
pld [r1, #(CACHE_LINE_SIZE*0)]
pld [r1, #(CACHE_LINE_SIZE*1)]
/* do we have at least 16-bytes to copy (needed for alignment below) */
cmp r2, #16
@@ -79,13 +80,11 @@ memcpy:
2:
0: /* preload immediately the next cache line, which we may need */
pld [r1, #(32*0)]
pld [r1, #(32*1)]
pld [r1, #(32*2)]
pld [r1, #(32*3)]
pld [r1, #(CACHE_LINE_SIZE*0)]
pld [r1, #(CACHE_LINE_SIZE*1)]
/* make sure we have at least 128 bytes to copy */
subs r2, r2, #128
/* make sure we have at least 64 bytes to copy */
subs r2, r2, #64
blo 2f
/* preload all the cache lines we need.
@@ -94,29 +93,21 @@ memcpy:
* avoid the goofy code below. In practice this doesn't seem to make
* a big difference.
*/
pld [r1, #(PREFETCH_DISTANCE + 32*0)]
pld [r1, #(PREFETCH_DISTANCE + 32*1)]
pld [r1, #(PREFETCH_DISTANCE + 32*2)]
pld [r1, #(PREFETCH_DISTANCE + 32*3)]
pld [r1, #(CACHE_LINE_SIZE*2)]
pld [r1, #(CACHE_LINE_SIZE*3)]
pld [r1, #(PREFETCH_DISTANCE)]
1: /* The main loop copies 128 bytes at a time */
1: /* The main loop copies 64 bytes at a time */
vld1.8 {d0 - d3}, [r1]!
vld1.8 {d4 - d7}, [r1]!
vld1.8 {d16 - d19}, [r1]!
vld1.8 {d20 - d23}, [r1]!
pld [r1, #(PREFETCH_DISTANCE + 32*0)]
pld [r1, #(PREFETCH_DISTANCE + 32*1)]
pld [r1, #(PREFETCH_DISTANCE + 32*2)]
pld [r1, #(PREFETCH_DISTANCE + 32*3)]
subs r2, r2, #128
pld [r1, #(PREFETCH_DISTANCE)]
subs r2, r2, #64
vst1.8 {d0 - d3}, [r0, :128]!
vst1.8 {d4 - d7}, [r0, :128]!
vst1.8 {d16 - d19}, [r0, :128]!
vst1.8 {d20 - d23}, [r0, :128]!
bhs 1b
2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
add r2, r2, #128
add r2, r2, #64
subs r2, r2, #32
blo 4f

View File

@@ -80,7 +80,7 @@ memset:
rsb r3, r0, #0
ands r3, r3, #0x1C
beq aligned32
beq 3f
cmp r3, r2
andhi r3, r2, #0x1C
sub r2, r2, r3
@@ -93,7 +93,7 @@ memset:
movs r3, r3, lsl #2
strcs r1, [r0], #4
aligned32:
3:
subs r2, r2, #32
mov r3, r1
bmi 2f

View File

@@ -188,5 +188,12 @@ struct kgsl_sharedmem_from_vmalloc {
#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free)
struct kgsl_drawctxt_set_bin_base_offset {
unsigned int drawctxt_id;
unsigned int offset;
};
#define IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET _IOW(KGSL_IOC_TYPE, 0x25, struct kgsl_drawctxt_set_bin_base_offset)
#endif

View File

@@ -39,8 +39,13 @@
#define debug_log(format, ...) \
__libc_android_log_print(ANDROID_LOG_DEBUG, "libc-abort", (format), ##__VA_ARGS__ )
#ifdef __arm__
void
__libc_android_abort(void)
#else
void
abort(void)
#endif
{
struct atexit *p = __atexit;
static int cleanup_called = 0;
@@ -97,3 +102,29 @@ abort(void)
(void)kill(getpid(), SIGABRT);
_exit(1);
}
#ifdef __arm__
/*
* abort() does not return, which gcc interprets to mean that it doesn't
* need to preserve any of the callee-save registers. Unfortunately this
* includes the link register, so if LR is used there is no way to determine
* which function called abort().
*
* We work around this by inserting a trivial stub that doesn't alter
* any of the "interesting" registers and thus doesn't need to save them.
* We can't just call __libc_android_abort from C because gcc uses "bl"
* without first saving LR, so we use an asm statement. This also has
* the side-effect of replacing abort() with __libc_android_abort() in
* the stack trace.
*
* Ideally __libc_android_abort would be static, but I haven't figured out
* how to tell gcc to call a static function from an asm statement.
*/
void
abort(void)
{
asm ("b __libc_android_abort");
_exit(1); /* suppress gcc noreturn warnings */
}
#endif

View File

@@ -30,65 +30,41 @@
#include "linker_debug.h"
#include "ba.h"
struct ba_bits {
unsigned allocated:1; /* 1 if allocated, 0 if free */
unsigned order:7; /* size of the region in ba space */
};
struct ba_info {
/* start address of the ba space */
unsigned long base;
/* total size of the ba space */
unsigned long size;
/* number of entries in the ba space */
int num_entries;
/* the bitmap for the region indicating which entries are allocated
* and which are free */
struct ba_bits *bitmap;
};
#undef min
#define min(a,b) ((a)<(b)?(a):(b))
#define BA_MIN_ALLOC LIBINC
#define BA_MAX_ORDER 128
#define BA_START LIBBASE
#define BA_SIZE (LIBLAST - LIBBASE)
#define BA_IS_FREE(index) (!(ba.bitmap[index].allocated))
#define BA_ORDER(index) ba.bitmap[index].order
#define BA_IS_FREE(index) (!(ba->bitmap[index].allocated))
#define BA_ORDER(index) ba->bitmap[index].order
#define BA_BUDDY_INDEX(index) ((index) ^ (1 << BA_ORDER(index)))
#define BA_NEXT_INDEX(index) ((index) + (1 << BA_ORDER(index)))
#define BA_OFFSET(index) ((index) * BA_MIN_ALLOC)
#define BA_START_ADDR(index) (BA_OFFSET(index) + ba.base)
#define BA_LEN(index) ((1 << BA_ORDER(index)) * BA_MIN_ALLOC)
#define BA_OFFSET(index) ((index) * ba->min_alloc)
#define BA_START_ADDR(index) (BA_OFFSET(index) + ba->base)
#define BA_LEN(index) ((1 << BA_ORDER(index)) * ba->min_alloc)
static struct ba_bits ba_bitmap[BA_SIZE / BA_MIN_ALLOC];
static unsigned long ba_order(struct ba *ba, unsigned long len);
static struct ba_info ba = {
.base = BA_START,
.size = BA_SIZE,
.bitmap = ba_bitmap,
.num_entries = sizeof(ba_bitmap)/sizeof(ba_bitmap[0]),
};
void ba_init(void)
void ba_init(struct ba *ba)
{
int i, index = 0;
for (i = sizeof(ba.num_entries) * 8 - 1; i >= 0; i--) {
if (ba.num_entries & 1<<i) {
unsigned long max_order = ba_order(ba, ba->size);
if (ba->max_order == 0 || ba->max_order > max_order)
ba->max_order = max_order;
for (i = sizeof(ba->num_entries) * 8 - 1; i >= 0; i--) {
if (ba->num_entries & 1<<i) {
BA_ORDER(index) = i;
index = BA_NEXT_INDEX(index);
}
}
}
int ba_free(int index)
int ba_free(struct ba *ba, int index)
{
int buddy, curr = index;
/* clean up the bitmap, merging any buddies */
ba.bitmap[curr].allocated = 0;
ba->bitmap[curr].allocated = 0;
/* find a slots buddy Buddy# = Slot# ^ (1 << order)
* if the buddy is also free merge them
* repeat until the buddy is not free or end of the bitmap is reached
@@ -103,16 +79,16 @@ int ba_free(int index)
} else {
break;
}
} while (curr < ba.num_entries);
} while (curr < ba->num_entries);
return 0;
}
static unsigned long ba_order(unsigned long len)
static unsigned long ba_order(struct ba *ba, unsigned long len)
{
unsigned long i;
len = (len + BA_MIN_ALLOC - 1) / BA_MIN_ALLOC;
len = (len + ba->min_alloc - 1) / ba->min_alloc;
len--;
for (i = 0; i < sizeof(len)*8; i++)
if (len >> i == 0)
@@ -120,14 +96,14 @@ static unsigned long ba_order(unsigned long len)
return i;
}
int ba_allocate(unsigned long len)
int ba_allocate(struct ba *ba, unsigned long len)
{
int curr = 0;
int end = ba.num_entries;
int end = ba->num_entries;
int best_fit = -1;
unsigned long order = ba_order(len);
unsigned long order = ba_order(ba, len);
if (order > BA_MAX_ORDER)
if (order > ba->max_order)
return -1;
/* look through the bitmap:
@@ -165,16 +141,16 @@ int ba_allocate(unsigned long len)
buddy = BA_BUDDY_INDEX(best_fit);
BA_ORDER(buddy) = BA_ORDER(best_fit);
}
ba.bitmap[best_fit].allocated = 1;
ba->bitmap[best_fit].allocated = 1;
return best_fit;
}
unsigned long ba_start_addr(int index)
unsigned long ba_start_addr(struct ba *ba, int index)
{
return BA_START_ADDR(index);
}
unsigned long ba_len(int index)
unsigned long ba_len(struct ba *ba, int index)
{
return BA_LEN(index);
}

View File

@@ -29,10 +29,31 @@
#ifndef __LINKER_BA_H
#define __LINKER_BA_H
extern void ba_init(void);
extern int ba_allocate(unsigned long len);
extern int ba_free(int index);
extern unsigned long ba_start_addr(int index);
extern unsigned long ba_len(int index);
struct ba_bits {
unsigned allocated:1; /* 1 if allocated, 0 if free */
unsigned order:7; /* size of the region in ba space */
};
struct ba {
/* start address of the ba space */
unsigned long base;
/* total size of the ba space */
unsigned long size;
/* the smaller allocation that can be made */
unsigned long min_alloc;
/* the order of the largest allocation that can be made */
unsigned long max_order;
/* number of entries in the ba space */
int num_entries;
/* the bitmap for the region indicating which entries are allocated
* and which are free */
struct ba_bits *bitmap;
};
extern void ba_init(struct ba *ba);
extern int ba_allocate(struct ba *ba, unsigned long len);
extern int ba_free(struct ba *ba, int index);
extern unsigned long ba_start_addr(struct ba *ba, int index);
extern unsigned long ba_len(struct ba *ba, int index);
#endif

View File

@@ -91,6 +91,18 @@ static soinfo *sonext = &libdl_info;
static soinfo *somain; /* main process, always the one after libdl_info */
#endif
/* Set up for the buddy allocator managing the prelinked libraries. */
static struct ba_bits ba_prelink_bitmap[(LIBLAST - LIBBASE) / LIBINC];
static struct ba ba_prelink = {
.base = LIBBASE,
.size = LIBLAST - LIBBASE,
.min_alloc = LIBINC,
/* max_order will be determined automatically */
.bitmap = ba_prelink_bitmap,
.num_entries = sizeof(ba_prelink_bitmap)/sizeof(ba_prelink_bitmap[0]),
};
static inline int validate_soinfo(soinfo *si)
{
return (si >= sopool && si < sopool + SO_MAX) ||
@@ -783,14 +795,14 @@ alloc_mem_region(soinfo *si)
for it from the buddy allocator, which manages the area between
LIBBASE and LIBLAST.
*/
si->ba_index = ba_allocate(si->size);
si->ba_index = ba_allocate(&ba_prelink, si->size);
if(si->ba_index >= 0) {
si->base = ba_start_addr(si->ba_index);
si->base = ba_start_addr(&ba_prelink, si->ba_index);
PRINT("%5d mapping library '%s' at %08x (index %d) " \
"through buddy allocator.\n",
pid, si->name, si->base, si->ba_index);
if (reserve_mem_region(si) < 0) {
ba_free(si->ba_index);
ba_free(&ba_prelink, si->ba_index);
si->ba_index = -1;
si->base = 0;
goto err;
@@ -1086,7 +1098,7 @@ load_library(const char *name)
/* Now actually load the library's segments into right places in memory */
if (load_segments(fd, &__header[0], si) < 0) {
if (si->ba_index >= 0) {
ba_free(si->ba_index);
ba_free(&ba_prelink, si->ba_index);
si->ba_index = -1;
}
goto fail;
@@ -1189,7 +1201,7 @@ unsigned unload_library(soinfo *si)
PRINT("%5d releasing library '%s' address space at %08x "\
"through buddy allocator.\n",
pid, si->name, si->base);
ba_free(si->ba_index);
ba_free(&ba_prelink, si->ba_index);
}
notify_gdb_of_unload(si);
free_info(si);
@@ -1240,9 +1252,13 @@ static int reloc_library(soinfo *si, Elf32_Rel *rel, unsigned count)
return -1;
}
#endif
if ((s->st_shndx == SHN_UNDEF) && (s->st_value != 0)) {
DL_ERR("%5d In '%s', shndx=%d && value=0x%08x. We do not "
"handle this yet", pid, si->name, s->st_shndx,
// st_shndx==SHN_UNDEF means an undefined symbol.
// st_value should be 0 then, except that the low bit of st_value is
// used to indicate whether the symbol points to an ARM or thumb function,
// and should be ignored in the following check.
if ((s->st_shndx == SHN_UNDEF) && ((s->st_value & ~1) != 0)) {
DL_ERR("%5d In '%s', symbol=%s shndx=%d && value=0x%08x. We do not "
"handle this yet", pid, si->name, sym_name, s->st_shndx,
s->st_value);
return -1;
}
@@ -1893,7 +1909,7 @@ unsigned __linker_init(unsigned **elfdata)
vecs += 2;
}
ba_init();
ba_init(&ba_prelink);
si->base = 0;
si->dynamic = (unsigned *)-1;